Kontraste zwischen Grundschule und Gymnasium
knitr::opts_chunk$set(echo = T, message = F, warning = F)
library(rio)
library(tidyverse)
library(lubridate)
library(ggstance)
library(mice)
library(miceadds)
library(naniar)
library(psych)
library(ggrepel)
library(BayesFactor)
library(bindata)
library(pander)
library(corrgram)
library(hrbrthemes)
library(MASS)
library(bain)
R.Version()
$platform
[1] "x86_64-w64-mingw32"
$arch
[1] "x86_64"
$os
[1] "mingw32"
$system
[1] "x86_64, mingw32"
$status
[1] ""
$major
[1] "3"
$minor
[1] "6.2"
$year
[1] "2019"
$month
[1] "12"
$day
[1] "12"
$`svn rev`
[1] "77560"
$language
[1] "R"
$version.string
[1] "R version 3.6.2 (2019-12-12)"
$nickname
[1] "Dark and Stormy Night"
# import
gym <- rio::import("data/gym.xls")
gs <- rio::import("data/gs.xls")
gs <- gs %>%
dplyr::filter(`Laufende\nNummer` != "Beispiel") %>%
mutate(id = as.numeric(`Laufende\nNummer`))
## Korrektur von Daten auf Basis der Abweichungen
# Falschberechnungen der `Min Beginn seit Std-Anfang` (id == 189, 248, 329) werden übersprungen
# da die selbst berechnete variable `beg_hw_min` verwendet wird
# aufgrund der unterschiedlichen Zeitzone muss zusätzlich noch eine h drauf gerechnet werden
gs[which(gs$id == 105), "Uhrzeit Beginn HA Vergabe"] <- "1899-12-31 11:48:00" # Tippfehler, nochmals in FB nachgeschaut
gs[which(gs$id == 105), "Uhrzeit Ende Vergabe"] <- "1899-12-31 11:51:00" # Tippfehler, nochmals in FB nachgeschaut
gs[which(gs$id == 120), "Uhrzeit Beginn HA Vergabe"] <- "1899-12-31 11:58:00" # Tippfehler, nochmals in FB nachgeschaut
gs[which(gs$id == 168), "Uhrzeit Beginn HA Vergabe"] <- "1899-12-31 11:43:00" # Tippfehler, nochmals in FB nachgeschaut
gs[which(gs$id == 250), "Uhrzeit Ende Vergabe"] <- "1899-12-31 12:33:00" # Tippfehler, nochmals in FB nachgeschaut
gs[which(gs$id == 187), "Uhrzeit Beginn der Std laut Plan"] <- "1899-12-31 09:20:00" # Tippfehler, nochmals in FB nachgeschaut
gs[which(gs$id == 187), "Uhrzeit Ende der Std laut Plan"] <- "1899-12-31 10:50:00" # Tippfehler, nochmals in FB nachgeschaut
gs[which(gs$id == 187), "Uhrzeit Beginn HA Vergabe"] <- "1899-12-31 10:24:00" # Tippfehler, nochmals in FB nachgeschaut
gs[which(gs$id == 187), "Uhrzeit Ende Vergabe"] <- "1899-12-31 10:40:00" # Tippfehler, nochmals in FB nachgeschaut
gs[which(gs$id == 307), "Uhrzeit Ende Vergabe"] <- "1899-12-31 10:16:00" # Tippfehler, nochmals in FB nachgeschaut
gs[which(gs$id == 315), "Uhrzeit Ende Vergabe"] <- "1899-12-31 11:55:00" # Tippfehler, nochmals in FB nachgeschaut
gs[which(gs$id == 220), "Ankündigung"] <- 1 # Tippfehler
gs[which(gs$id == 198), "L schreibt"] <- 1 # Tippfehler
gs[which(gs$id == 330), "L will Notation"] <- NA # Wert "9" gibt es nicht, nicht nachvollziehbar, welcher Wert plausibel ist
# new var
gs <- gs %>%
mutate(beg_plan = hm(format(strptime(`Uhrzeit Beginn der Std laut Plan`,"%Y-%m-%d %H:%M:%S"),'%H:%M')),
end_plan = hm(format(strptime(`Uhrzeit Ende der Std laut Plan`,"%Y-%m-%d %H:%M:%S"),'%H:%M')),
beg_act = hm(format(strptime(`Uhrzeit Realer Beginn der Std`,"%Y-%m-%d %H:%M:%S"),'%H:%M')),
beg_hw = hm(format(strptime(`Uhrzeit Beginn HA Vergabe`,"%Y-%m-%d %H:%M:%S"),'%H:%M')),
end_hw = hm(format(strptime(`Uhrzeit Ende Vergabe`,"%Y-%m-%d %H:%M:%S"),'%H:%M')),
stunde = lubridate::hour(end_plan - beg_plan)*60 + lubridate::minute(end_plan - beg_plan), # geplante Länge der Unterrichtsstunde
beg_hw_min = lubridate::hour(beg_hw - beg_plan)*60 + lubridate::minute(beg_hw - beg_plan), # Beginn der HA relativ zu GEPLANTEM h-Anfang
dau_hw_min = lubridate::hour(end_hw - beg_hw)*60 + lubridate::minute(end_hw - beg_hw),
lh_ank = `Ankündigung`,
lh_auf = `L verlangt Aufmerk`,
lh_nen = `L nennt`,
lh_sch = `L schreibt`,
lh_erl = `L erläutert`,
lh_wfr = `L will Fragen`,
lh_bfr = `L beantwortet`,
lh_wno = `L will Notation`,
lh_ano = `L achtet Notat`,
sh_auf = `S aufmerksam`,
sh_mel = `S melden`,
sh_fra = `S fragen`,
sh_not = `S notieren`)
gym <- gym %>%
mutate(id = as.numeric(`Laufende\nNummer`))
gym[which(gym$id == 141 |
gym$id == 148 |
gym$id == 149 |
gym$id == 150 |
gym$id == 164 |
gym$id == 175 |
gym$id == 180 |
gym$id == 181 |
gym$id == 182 |
gym$id == 188 |
gym$id == 189), "L schreibt"] <- 1 # Tippfehler, in FB nachgeschaut
# new var
gym <- gym %>%
mutate(beg_hw_min = `Min Beginn`,
dau_hw_min = `HA stellen Dauer`,
stunde = case_when(Doppelstundenmodell == 0 ~ 45,
Doppelstundenmodell == 1 ~ 90),
Schulart = "Gymnasium",
lh_ank = `Ankündigung`,
lh_auf = `L sorg Aufmerk`,
lh_nen = `L nennt`,
lh_sch = `L schreibt`,
lh_erl = `Erläuterung gesamt`,
lh_wfr = `L will Fragen`,
lh_bfr = `L beantwortet`,
lh_wno = `L will Notation`,
lh_ano = `L achtet Notat`,
sh_auf = `S aufmerksam`,
sh_mel = `S melden`,
sh_fra = `Fragen gesamt`,
sh_not = `S notieren`)
gs_p <- gs %>%
dplyr::select(beg_hw_min, dau_hw_min, stunde, Schulart, Klassenstufe, lh_ank, lh_auf, lh_nen,
lh_sch, lh_erl, lh_wfr, lh_bfr, lh_wno, lh_ano, sh_auf, sh_mel, sh_fra, sh_not)
gym_p <- gym %>%
dplyr::select(beg_hw_min, dau_hw_min, stunde, Schulart, Klassenstufe, lh_ank, lh_auf, lh_nen,
lh_sch, lh_erl, lh_wfr, lh_bfr, lh_wno, lh_ano, sh_auf, sh_mel, sh_fra, sh_not)
p_data <- rbind(gs_p, gym_p)
# filter away the 10 cross grade classes as they are difficult to analyze
# (combine characteristics of two grades in one)
p_data <- p_data %>%
dplyr::filter(Klassenstufe != "1_2" | is.na(Klassenstufe)) %>%
mutate(Klassenstufe = as.numeric(Klassenstufe))
### SKALENNIVEAUS ###
# lh_ank: dichotom [0,1]
# lh_auf: dichotom [0,1]
# lh_nen: dichotom [0,1]
# lh_sch: dichotom [0,1]
# lh_erl: dichotom [0,1]
# lh_wfr: dichotom [0,1]
# lh_bfr: metrisch, absolutskaliert, schief
# lh_wno: dichotom [0,1]
# lh_ano: dichotom [0,1]
# sh_auf: ordinalskaliert [1:5]
# sh_mel: metrisch, absolutskaliert, schief
# sh_fra: metrisch, absolutskaliert, schief
# sh_not: metrisch, intervllskaliert, bimodal
vis_miss(p_data)
gg_miss_upset(p_data)
p_data$Schulart <- as.factor(p_data$Schulart)
p_data$Klassenstufe <- as.numeric(p_data$Klassenstufe)
model_tab <- data.frame(variable = c("beg_hw_min", "dau_hw_min", "stunde", "Schulart", "Klassenstufe", "lh_ank", "lh_auf", "lh_nen", "lh_sch",
"lh_erl", "lh_wfr", "lh_bfr", "lh_wno", "lh_ano", "sh_auf", "sh_mel", "sh_fra", "sh_not"),
'scale type' = c("metric", "metric", "metric", "nominal", "metric", "binary", "binary", "binary", "binary",
"binary", "binary", "metric", "binary", "binary", "metric", "metric", "metric", "metric"),
method = c("pmm", "pmm", "pmm", "polyreg", "pmm", "logreg", "logreg", "logreg", "logreg",
"logreg", "logreg", "pmm", "logreg", "logreg", "pmm", "pmm", "pmm", "pmm"))
pander(model_tab)
| variable | scale.type | method |
|---|---|---|
| beg_hw_min | metric | pmm |
| dau_hw_min | metric | pmm |
| stunde | metric | pmm |
| Schulart | nominal | polyreg |
| Klassenstufe | metric | pmm |
| lh_ank | binary | logreg |
| lh_auf | binary | logreg |
| lh_nen | binary | logreg |
| lh_sch | binary | logreg |
| lh_erl | binary | logreg |
| lh_wfr | binary | logreg |
| lh_bfr | metric | pmm |
| lh_wno | binary | logreg |
| lh_ano | binary | logreg |
| sh_auf | metric | pmm |
| sh_mel | metric | pmm |
| sh_fra | metric | pmm |
| sh_not | metric | pmm |
# Defining methods
meth <- c("pmm", "pmm", "pmm", "polyreg", "pmm", "logreg", "logreg", "logreg", "logreg",
"logreg", "logreg", "pmm", "logreg", "logreg", "pmm", "pmm", "pmm", "pmm")
number of imputations
1000
m <- 1000
# cor(y = p_data[,-4], x = !is.na(p_data[,-4]), use = "pair")
corrgram(p_data, lower.panel = "panel.pie", upper.panel = "panel.cor")
# set predictor specifications
ini <- mice(p_data,
maxit = 0,
m = m,
meth = meth,
seed = 666
)
pred <- ini$predictorMatrix
pred[,"stunde"] <- 0 # stunde highly correlated with beg_hw_min
pred[,"lh_bfr"] <- 0 # lh_bfr highly correlated with sh_fra
variables that are function of other variables
None.
which variables to impute
All.
number of iterations
20
maxit <- 20
imp <- mice(p_data,
maxit = maxit,
m = m,
meth = meth,
pred = pred,
seed = 666,
printFlag = F
)
## will produce one plot for each imputation (200) on each variable
## uncomment if interested, otherwise I spare the space
# lattice::stripplot(imp, pch = 20, cex = 1.2)
All values seem plausible.
check convergence
plot(imp)
Bayes Factor Design Analysis zur Bestimmung des BF Thresholds.
Aufgrund fehlender Referenzwerte nehmen wir eine Effektstärke von \(\varphi=.2\) an. Dieser liegt zwischen einem kleinen (\(\varphi=.1\)) und mittleren (\(\varphi=.3\)) Effekt nach Cohen (1988).
true_hyp <- c("H0 is true", "H1 is true") # two possibilities
phi <- c(0, .2) # determine effect sizes for hypotheses
Es werden 1000 Simulationen durchgeführt. Aufgrund der Robustheit der Ergebnisse in diesem Bereich, verzichteten wir auf eine größere Anzahl an Simulationen.
n_sim <- 1000 # number of simulations
sim_cor_results <- data.frame() # set up data frame for results
for(j in true_hyp) { # loop over both hypotheses
if (j == "H0 is true")
bincorr <- matrix(c(1,phi[1],phi[1],1), ncol=2) # create correlation matrix for H0
if (j == "H1 is true")
bincorr <- matrix(c(1,phi[2],phi[2],1), ncol=2) # create correlation matrix for H1
for (n in 1:n_sim) {
sim_df <- rmvbin(n = 510,
margprob = c(0.5, 0.5),
bincorr = bincorr)
sim_imp_m <- matrix(table(sim_df[,1], sim_df[,2]), 2, 2)
sim_fit <- contingencyTableBF(sim_imp_m, sampleType="indepMulti",fixedMargin = "cols")
sim_cor_results[n+ifelse(j == "H0 is true", 0, n_sim), "BayesFactor"] <- extractBF(sim_fit)$bf
sim_cor_results[n+ifelse(j == "H0 is true", 0, n_sim), "trueH"] <- j
rm(sim_imp_m, sim_fit)
}
}
# categorize if result is correct, incorrect or inconclusive
sim_cor_results <- sim_cor_results %>%
mutate(BF3 = case_when(
BayesFactor >= 3 & trueH == "H0 is true" ~ "incorrect",
BayesFactor < 3 & BayesFactor > (1/3) & trueH == "H0 is true" ~ "inconclusive",
BayesFactor <= (1/3) & trueH == "H0 is true" ~ "correct",
BayesFactor >= 3 & trueH == "H1 is true" ~ "correct",
BayesFactor < 3 & BayesFactor > (1/3) & trueH == "H1 is true" ~ "inconclusive",
BayesFactor <= (1/3) & trueH == "H1 is true" ~ "incorrect"),
BF5 = case_when(
BayesFactor >= 5 & trueH == "H0 is true" ~ "incorrect",
BayesFactor < 5 & BayesFactor > (1/5) & trueH == "H0 is true" ~ "inconclusive",
BayesFactor <= (1/5) & trueH == "H0 is true" ~ "correct",
BayesFactor >= 5 & trueH == "H1 is true" ~ "correct",
BayesFactor < 5 & BayesFactor > (1/5) & trueH == "H1 is true" ~ "inconclusive",
BayesFactor <= (1/5) & trueH == "H1 is true" ~ "incorrect"),
BF10 = case_when(
BayesFactor >= 10 & trueH == "H0 is true" ~ "incorrect",
BayesFactor < 10 & BayesFactor > (1/10) & trueH == "H0 is true" ~ "inconclusive",
BayesFactor <= (1/10) & trueH == "H0 is true" ~ "correct",
BayesFactor >= 10 & trueH == "H1 is true" ~ "correct",
BayesFactor < 10 & BayesFactor > (1/10) & trueH == "H1 is true" ~ "inconclusive",
BayesFactor <= (1/10) & trueH == "H1 is true" ~ "incorrect"),
)
# pivot into long data frame for plot
sim_cor_results_l <- pivot_longer(sim_cor_results,
cols = 3:5,
names_to = "BF Threshold",
values_to = "decision")
# order factor for plot
sim_cor_results_l$`BF Threshold` <- factor(sim_cor_results_l$`BF Threshold`, levels = c("BF3", "BF5", "BF10"))
# hrbrthemes::import_roboto_condensed()
ggplot(sim_cor_results_l, aes(trueH, fill = decision)) +
geom_bar(position = "fill") +
geom_text(aes(label=round(..count../n_sim*100), y= ..count../n_sim),
position =position_stack(vjust = 0.5), stat= "count",
color = "white", size = 5) +
coord_flip() +
facet_wrap(~`BF Threshold`, ncol = 1) +
labs(title = "Results of the Bayes Factor Design Analysis",
subtitle = "For three different Bayes Factors",
caption = paste("In % (rounded), based on", n_sim, "simulations")) +
xlab("True Hypothesis") +
scale_fill_viridis_d() +
theme_ipsum_rc()
Die Ergebnisse zeigen, dass bei einem BF von 3 kaum falsch-positive Ergebnisse zustande kommen (~1%) und die Power jeweils zufriedenstellend bzw. sehr hoch ist. Es ist somit nicht nötig einen höheren BF zu verweneden, um falsch-positive Ergebnisse zu vermeiden. Höhere BFs hätten zudem den Nachteil, dass vermehrt inkonklusive Ergebnisse auftreten. Wir verwenden bei der Auswertung der binären Zusammenhänge (Kreuztabellen) somit einen Threshold von \(BF=3\) bzw. \(BF=\frac{1}{3}\).
Bayes Factor Design Analysis zur Bestimmung des BF Thresholds.
Aufgrund fehlender Referenzwerte nehmen wir eine Effektstärke von \(d=.35\) an. Dieser liegt zwischen einem kleinen (\(d=.2\)) und mittleren (\(d=.5\)) Effekt nach Cohen (1988).
true_hyp <- c("H0 is true", "H1 is true") # two possibilities
true_d <- c(0, .35) # determine effect sizes for hypotheses
Es werden 1000 Simulationen durchgeführt. Aufgrund der Robustheit der Ergebnisse in diesem Bereich, verzichteten wir auf eine größere Anzahl an Simulationen.
n_sim <- 1000 # number of simulations
sim_ttest_results <- data.frame() # set up data frame for results
for(j in true_hyp) { # loop over both hypotheses
for (n in 1:n_sim) { # loop over all simulations
sim_ttest_df <- data.frame(mvrnorm(n = 510, # fixed n of 510
mu = if(j == "H0 is true")
c(0,true_d[1]) else # create data set for H0
if(j == "H1 is true")
c(0,true_d[2]), # create data set for H1
Sigma = matrix(c( 1, .5, # vcov matrix
.5, 1),
2, 2)))
# pivot longer to insert it into a lm
sim_ttest_df_l <- pivot_longer(sim_ttest_df, 1:2, names_to = "group", values_to = "dependentVar")
### LINEAR MODEL ############################################ #
# compute the means of each group
sim_fit_ttest <- lm(dependentVar ~ group-1,
data = sim_ttest_df_l)
### BAIN #################################################### #
# generating hypotheses
hypotheses <- "groupX1 = groupX2; groupX1 < groupX2" #H1 and H2 respectively
bf_ttest <- bain(sim_fit_ttest,
hypothesis = hypotheses
)
sim_ttest_results[n+ifelse(j == "H0 is true", 0, n_sim),"BayesFactor"] <- bf_ttest$BFmatrix[2,1] # BF(H2,H1)
sim_ttest_results[n+ifelse(j == "H0 is true", 0, n_sim), "trueH"] <- j
rm(bf_ttest, sim_fit_ttest, sim_ttest_df, sim_ttest_df_l)
}
}
# categorize if result is correct, incorrect or inconclusive
sim_ttest_results <- sim_ttest_results %>%
mutate(BF3 = case_when(
BayesFactor >= 3 & trueH == "H0 is true" ~ "incorrect",
BayesFactor < 3 & BayesFactor > (1/3) & trueH == "H0 is true" ~ "inconclusive",
BayesFactor <= (1/3) & trueH == "H0 is true" ~ "correct",
BayesFactor >= 3 & trueH == "H1 is true" ~ "correct",
BayesFactor < 3 & BayesFactor > (1/3) & trueH == "H1 is true" ~ "inconclusive",
BayesFactor <= (1/3) & trueH == "H1 is true" ~ "incorrect"),
BF5 = case_when(
BayesFactor >= 5 & trueH == "H0 is true" ~ "incorrect",
BayesFactor < 5 & BayesFactor > (1/5) & trueH == "H0 is true" ~ "inconclusive",
BayesFactor <= (1/5) & trueH == "H0 is true" ~ "correct",
BayesFactor >= 5 & trueH == "H1 is true" ~ "correct",
BayesFactor < 5 & BayesFactor > (1/5) & trueH == "H1 is true" ~ "inconclusive",
BayesFactor <= (1/5) & trueH == "H1 is true" ~ "incorrect"),
BF10 = case_when(
BayesFactor >= 10 & trueH == "H0 is true" ~ "incorrect",
BayesFactor < 10 & BayesFactor > (1/10) & trueH == "H0 is true" ~ "inconclusive",
BayesFactor <= (1/10) & trueH == "H0 is true" ~ "correct",
BayesFactor >= 10 & trueH == "H1 is true" ~ "correct",
BayesFactor < 10 & BayesFactor > (1/10) & trueH == "H1 is true" ~ "inconclusive",
BayesFactor <= (1/10) & trueH == "H1 is true" ~ "incorrect"),
)
# pivot into long data frame for plot
sim_ttest_results_l <- pivot_longer(sim_ttest_results,
cols = 3:5,
names_to = "BF Threshold",
values_to = "decision")
# order factor for plot
sim_ttest_results_l$`BF Threshold` <- factor(sim_ttest_results_l$`BF Threshold`, levels = c("BF3", "BF5", "BF10"))
# hrbrthemes::import_roboto_condensed()
ggplot(sim_ttest_results_l, aes(trueH, fill = decision)) +
geom_bar(position = "fill") +
geom_text(aes(label=round(..count../n_sim*100), y= ..count../n_sim),
position =position_stack(vjust = 0.5), stat= "count",
color = "white", size = 5) +
coord_flip() +
facet_wrap(~`BF Threshold`, ncol = 1) +
labs(title = "Results of the Bayes Factor Design Analysis",
subtitle = "For three different Bayes Factors",
caption = paste("In % (rounded), based on", n_sim, "simulations")) +
xlab("True Hypothesis") +
scale_fill_viridis_d() +
theme_ipsum_rc()
Hier zeigen sich bei einem \(N=510\) und einem angenommenen Cohen’s \(d=.35\) ebenfalls (nahezu) keine falsch-positiven Ergebnisse. Somit wird auch hier auf ein Threshold von \(BF=3\) bzw. \(BF=\frac{1}{3}\) verwendet.
Deskriptive Daten
pander::panderOptions('digits', 3)
## 45 Minuten Stunden ################ #
# alle Schulstunden mit Länge 45min herausfiltern
schulart45 <- p_data %>%
dplyr::filter(stunde == 45)
# Deskriptiven Werte für diese Stunden
descriptives45 <- p_data %>%
dplyr::filter(stunde == 45) %>%
dplyr::select(beg_hw_min, dau_hw_min) %>%
psych::describeBy(group = schulart45$Schulart)
pander::pander(descriptives45)
Grundschule:
| vars | n | mean | sd | median | trimmed | mad | min | |
|---|---|---|---|---|---|---|---|---|
| beg_hw_min | 1 | 270 | 33.3 | 13.3 | 39 | 34.7 | 5.93 | 1 |
| dau_hw_min | 2 | 267 | 4.52 | 3.15 | 4 | 4.07 | 1.48 | 0 |
| max | range | skew | kurtosis | se | |
|---|---|---|---|---|---|
| beg_hw_min | 83 | 82 | -0.728 | 0.366 | 0.806 |
| dau_hw_min | 25 | 25 | 2.13 | 7.83 | 0.193 |
Gymnasium:
| vars | n | mean | sd | median | trimmed | mad | min | |
|---|---|---|---|---|---|---|---|---|
| beg_hw_min | 1 | 143 | 38.8 | 8.35 | 42 | 40.5 | 2.97 | 1 |
| dau_hw_min | 2 | 142 | 2.94 | 2.22 | 2 | 2.67 | 1.48 | 0 |
| max | range | skew | kurtosis | se | |
|---|---|---|---|---|---|
| beg_hw_min | 49 | 48 | -2.63 | 7.85 | 0.698 |
| dau_hw_min | 15 | 15 | 2.06 | 7 | 0.186 |
# Modus
dens_45gs <- schulart45 %>% # filter only measured cases
dplyr::filter(!is.na(beg_hw_min) & Schulart == "Grundschule") #filter for Grundschule
dens_45gs <- density(dens_45gs$beg_hw_min) # get density curve
dens_45gy <- schulart45 %>% # filter only measured cases
dplyr::filter(!is.na(beg_hw_min) & Schulart == "Gymnasium") #filter for Gymnasium
dens_45gy <- density(dens_45gy$beg_hw_min) # get density curve
paste("Modus der Hausaufgabenvergabe in 45min Stunden Grundschule =", round(dens_45gs$x[which.max(dens_45gs$y)], 1))
[1] "Modus der Hausaufgabenvergabe in 45min Stunden Grundschule = 41.1"
paste("Modus der Hausaufgabenvergabe in 45min Stunden Gymnasium =", round(dens_45gy$x[which.max(dens_45gy$y)], 1))
[1] "Modus der Hausaufgabenvergabe in 45min Stunden Gymnasium = 42.9"
## 90 Minuten Stunden ################ #
# alle Schulstunden mit Länge 45min herausfiltern
schulart90 <- p_data %>%
dplyr::filter(stunde == 90)
# Deskriptiven Werte für diese Stunden
descriptives90 <- p_data %>%
dplyr::filter(stunde == 90) %>%
dplyr::select(beg_hw_min, dau_hw_min) %>%
psych::describeBy(group = schulart90$Schulart)
pander::pander(descriptives90)
Grundschule:
| vars | n | mean | sd | median | trimmed | mad | min | max | |
|---|---|---|---|---|---|---|---|---|---|
| beg_hw_min | 1 | 26 | 69.3 | 24.1 | 80 | 73.5 | 7.41 | 5 | 85 |
| dau_hw_min | 2 | 26 | 8.27 | 7.41 | 5.5 | 7.09 | 3.71 | 1 | 35 |
| range | skew | kurtosis | se | |
|---|---|---|---|---|
| beg_hw_min | 80 | -1.73 | 1.69 | 4.73 |
| dau_hw_min | 34 | 2.06 | 4.24 | 1.45 |
Gymnasium:
| vars | n | mean | sd | median | trimmed | mad | min | max | |
|---|---|---|---|---|---|---|---|---|---|
| beg_hw_min | 1 | 42 | 79 | 15.9 | 85 | 82.2 | 5.93 | 29 | 95 |
| dau_hw_min | 2 | 40 | 3.83 | 2.77 | 3 | 3.44 | 2.97 | 1 | 13 |
| range | skew | kurtosis | se | |
|---|---|---|---|---|
| beg_hw_min | 66 | -1.72 | 2.02 | 2.45 |
| dau_hw_min | 12 | 1.17 | 1.26 | 0.438 |
# Modus
dens_90gs <- schulart90 %>% # filter only measured cases
dplyr::filter(!is.na(beg_hw_min) & Schulart == "Grundschule") #filter for Grundschule
dens_90gs <- density(dens_90gs$beg_hw_min) # get density curve
dens_90gy <- schulart90 %>% # filter only measured cases
dplyr::filter(!is.na(beg_hw_min) & Schulart == "Gymnasium") #filter for Gymnasium
dens_90gy <- density(dens_90gy$beg_hw_min) # get density curve
paste("Modus der Hausaufgabenvergabe in 90min Stunden Grundschule =", round(dens_90gs$x[which.max(dens_90gs$y)], 1))
[1] "Modus der Hausaufgabenvergabe in 90min Stunden Grundschule = 81.8"
paste("Modus der Hausaufgabenvergabe in 90min Stunden Gymnasium =", round(dens_90gy$x[which.max(dens_90gy$y)], 1))
[1] "Modus der Hausaufgabenvergabe in 90min Stunden Gymnasium = 87.2"
#### IMPUTATION #### #
# separate Imputation für 45min
p_data_45 <- p_data %>%
dplyr::filter(stunde==45)
imp45 <- mice(p_data_45,
maxit = maxit,
m = m,
meth = meth,
pred = pred,
seed = 666,
printFlag = F
)
#### Compute BFs within informed hypotheses framework (bain) ##################################### #
### for bain: compute vcov by hand ###
# create data frame to collect var and cov for each imputation
vcov_df <- data.frame(Grundschule = as.numeric(),
Gymnasium = as.numeric()
)
mean_df <- data.frame(Grundschule = as.numeric(),
Gymnasium = as.numeric()
)
# sample size per group
samp_gr <- table(mice::complete(imp45)$Schulart)
## loop over all m imputations
for(i in 1:m) {
# fit model
fit_lm <- lm(beg_hw_min ~ Schulart-1, data = mice::complete(imp45, i))
var_lm <- summary(fit_lm)$sigma**2 # get variance of the means (VOM)
vcov_df[i, "Grundschule"] <- var_lm/samp_gr["Grundschule"] # compute VOM per group
vcov_df[i, "Gymnasium"] <- var_lm/samp_gr["Gymnasium"] # compute VOM per group
# collect estimates of the means
mean_df[i, "Grundschule"] <- coef(fit_lm)["SchulartGrundschule"]
mean_df[i, "Gymnasium"] <- coef(fit_lm)["SchulartGymnasium"]
rm(fit_lm, var_lm) # clean up, because we like it tidy in here
}
## make var matrices
# compute the mean over var
vcov_df <- vcov_df %>%
summarize_all(mean)
# create matrices
mat1 <- matrix(vcov_df$Grundschule, 1, 1)
mat2 <- matrix(vcov_df$Gymnasium, 1, 1)
variances <- list(mat1, mat2)
## compute mean of estimates
bf_data <- mean_df %>%
summarize_all(mean)
bf_data <- as.numeric(bf_data)
names(bf_data) <- c("Grund", "Gym")
## BAIN ##### #
# generating hypotheses
hypotheses <- "Gym = Grund; Gym < Grund; Gym > Grund"
bf_45 <- bain(bf_data,
hypothesis = hypotheses,
n = samp_gr, # size of the groups
Sigma = variances, # matrices of residual variances of groups
group_parameters = 1, # there is 1 group specific parameter (the mean in each group)
joint_parameters = 0 # there are no parameters that apply to each of the groups (e.g. the regression coefficient of a covariate)
)
print(bf_45)
Bayesian informative hypothesis testing for an object of class numeric:
Fit_eq Com_eq Fit_in Com_in Fit Com BF PMPa PMPb
H1 0.000 0.017 1.000 1.000 0.000 0.017 0.001 0.000 0.000
H2 1.000 1.000 0.000 0.500 0.000 0.500 0.000 0.000 0.000
H3 1.000 1.000 1.000 0.500 1.000 0.500 333236.770 1.000 0.667
Hu 0.333
Hypotheses:
H1: Gym=Grund
H2: Gym<Grund
H3: Gym>Grund
Note: BF denotes the Bayes factor of the hypothesis at hand versus its complement.
print(bf_45$BFmatrix)
H1 H2 H3
H1 1.000000e+00 115.0622 3.452867e-04
H2 8.690949e-03 1.0000 3.000869e-06
H3 2.896144e+03 333236.7735 1.000000e+00
###### DESCRIPTIVE PLOT ############################################### #
## Awesome Rainvloud plots, check out source:
# Allen M, Poggiali D, Whitaker K et al. Raincloud plots: a multi-platform tool for
# robust data visualization [version 1; peer review: 2 approved].
# Wellcome Open Res 2019, 4:63. DOI: 10.12688/wellcomeopenres.15191.1
source("R_rainclouds.R")
ggplot(p_data%>%dplyr::filter(stunde == 45), aes(x="", y = beg_hw_min, fill = Schulart, colour = Schulart)) +
geom_flat_violin(position = position_nudge(x = 0, y = 0), adjust = 1.6, trim = FALSE, alpha = .3) +
geom_boxplot(aes(x=""), position = position_nudge(x = c(.49, .54), y = 0),
outlier.shape = NA, alpha = .5, width = .04, colour = "black") +
geom_hline(yintercept = 45, linetype = "dashed", colour = "#696f71", size = 1) +
scale_colour_brewer(palette = "Set1")+
scale_fill_brewer(palette = "Set1") +
scale_y_continuous(expand = c(0, 0), breaks = c(0, 10,20,30,40,45,50,60), limits = c(0, 65)) +
scale_x_discrete(expand = c(0, 0)) +
ylab("Minuten seit geplantem Stundenbeginn") +
xlab("% Häufigkeit") +
theme_light() +
theme(axis.line = element_line(colour = "#696f71"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_rect(fill = "#f6f7f7")) +
coord_flip()
Basierend auf untersuchten (nicht-fehlenden) Daten.
#### IMPUTATION #### #
# separate Imputation für 90min
p_data_90 <- p_data %>%
dplyr::filter(stunde==90)
imp90 <- mice(p_data_90,
maxit = maxit,
m = m,
meth = meth,
pred = pred,
seed = 666,
printFlag = F
)
#### Compute BFs within informed hypotheses framework (bain) ##################################### #
### for bain: compute vcov by hand ###
# create data frame to collect var and cov for each imputation
vcov_df <- data.frame(Grundschule = as.numeric(),
Gymnasium = as.numeric()
)
mean_df <- data.frame(Grundschule = as.numeric(),
Gymnasium = as.numeric()
)
# sample size per group
samp_gr <- table(mice::complete(imp90)$Schulart)
## loop over all m imputations
for(i in 1:m) {
fit_lm <- lm(beg_hw_min ~ Schulart-1, data = mice::complete(imp90, i))
var_lm <- summary(fit_lm)$sigma**2 # get variance of the means (VOM)
vcov_df[i, "Grundschule"] <- var_lm/samp_gr["Grundschule"] # compute VOM per group
vcov_df[i, "Gymnasium"] <- var_lm/samp_gr["Gymnasium"] # compute VOM per group
# collect estimates of the means
mean_df[i, "Grundschule"] <- coef(fit_lm)["SchulartGrundschule"]
mean_df[i, "Gymnasium"] <- coef(fit_lm)["SchulartGymnasium"]
rm(fit_lm, var_lm) # clean up, because we like it tidy in here
}
## make var matrices
# compute the mean over var
vcov_df <- vcov_df %>%
summarize_all(mean)
# create matrices
mat1 <- matrix(vcov_df$Grundschule, 1, 1)
mat2 <- matrix(vcov_df$Gymnasium, 1, 1)
variances <- list(mat1, mat2)
## compute mean of estimates
bf_data <- mean_df %>%
summarize_all(mean)
bf_data <- as.numeric(bf_data)
names(bf_data) <- c("Grund", "Gym")
## BAIN ##### #
# generating hypotheses
hypotheses <- "Gym = Grund; Gym < Grund; Gym > Grund"
bf_90 <- bain(bf_data,
hypothesis = hypotheses,
n = samp_gr, # size of the groups
Sigma = variances, # matrices of residual variances of groups
group_parameters = 1, # there is 1 group specific parameter (the mean in each group)
joint_parameters = 0 # there are no parameters that apply to each of the groups (e.g. the regression coefficient of a covariate)
)
print(bf_90)
Bayesian informative hypothesis testing for an object of class numeric:
Fit_eq Com_eq Fit_in Com_in Fit Com BF PMPa PMPb
H1 0.011 0.010 1.000 1.000 0.011 0.010 1.075 0.350 0.264
H2 1.000 1.000 0.023 0.500 0.023 0.500 0.023 0.015 0.011
H3 1.000 1.000 0.977 0.500 0.977 0.500 43.415 0.636 0.480
Hu 0.245
Hypotheses:
H1: Gym=Grund
H2: Gym<Grund
H3: Gym>Grund
Note: BF denotes the Bayes factor of the hypothesis at hand versus its complement.
print(bf_90$BFmatrix)
H1 H2 H3
H1 1.00000000 23.87773 0.54998494
H2 0.04188002 1.00000 0.02303338
H3 1.81823161 43.41525 1.00000000
###### DESCRIPTIVE PLOT ############################################### #
ggplot(p_data%>%dplyr::filter(stunde == 90),
aes(x="", y = beg_hw_min, fill = Schulart, colour = Schulart)) +
geom_flat_violin(position = position_nudge(x = 0, y = 0),
adjust = 1.6, trim = FALSE, alpha = .3) +
geom_boxplot(aes(x=""), position = position_nudge(x = c(.50, .56), y = 0),
outlier.shape = NA, alpha = .5, width = .05, colour = "black") +
geom_hline(yintercept = 90, linetype = "dashed", colour = "#696f71", size = 1) +
scale_colour_brewer(palette = "Set1")+
scale_fill_brewer(palette = "Set1") +
scale_y_continuous(expand = c(0, 0),
breaks = c(0, 10,20,30,40,50,60,70,80,90),
limits = c(0,95)) +
scale_x_discrete(expand = c(0, 0)) +
ylab("Minuten seit geplantem Stundenbeginn") +
xlab("% Häufigkeit") +
theme_light() +
theme(axis.line = element_line(colour = "#696f71"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_rect(fill = "#f6f7f7")) +
coord_flip()
#### Compute BFs within informed hypotheses framework (bain) ##################################### #
### for bain: compute vcov by hand ###
# create data frame to collect var and cov for each imputation
vcov_df <- data.frame(Grundschule = as.numeric(),
Gymnasium = as.numeric()
)
mean_df <- data.frame(Grundschule = as.numeric(),
Gymnasium = as.numeric()
)
# sample size per group
samp_gr <- table(mice::complete(imp45)$Schulart)
## loop over all m imputations
for(i in 1:m) {
fit_lm <- lm(dau_hw_min ~ Schulart-1, data = mice::complete(imp45, i))
var_lm <- summary(fit_lm)$sigma**2 # get variance of the means (VOM)
vcov_df[i, "Grundschule"] <- var_lm/samp_gr["Grundschule"] # compute VOM per group
vcov_df[i, "Gymnasium"] <- var_lm/samp_gr["Gymnasium"] # compute VOM per group
# collect estimates of the means
mean_df[i, "Grundschule"] <- coef(fit_lm)["SchulartGrundschule"]
mean_df[i, "Gymnasium"] <- coef(fit_lm)["SchulartGymnasium"]
rm(fit_lm, var_lm) # clean up, because we like it tidy in here
}
## make var matrices
# compute the mean over var
vcov_df <- vcov_df %>%
summarize_all(mean)
# create matrices
mat1 <- matrix(vcov_df$Grundschule, 1, 1)
mat2 <- matrix(vcov_df$Gymnasium, 1, 1)
variances <- list(mat1, mat2)
## compute mean of estimates
bf_data <- mean_df %>%
summarize_all(mean)
bf_data <- as.numeric(bf_data)
names(bf_data) <- c("Grund", "Gym")
## BAIN ##### #
# generating hypotheses
hypotheses <- "Gym = Grund; Gym < Grund; Gym > Grund"
bf_45 <- bain(bf_data,
hypothesis = hypotheses,
n = samp_gr, # size of the groups
Sigma = variances, # matrices of residual variances of groups
group_parameters = 1, # there is 1 group specific parameter (the mean in each group)
joint_parameters = 0 # there are no parameters that apply to each of the groups (e.g. the regression coefficient of a covariate)
)
print(bf_45)
Bayesian informative hypothesis testing for an object of class numeric:
Fit_eq Com_eq Fit_in Com_in Fit Com BF PMPa PMPb
H1 0.000 0.069 1.000 1.000 0.000 0.069 0.000 0.000 0.000
H2 1.000 1.000 1.000 0.500 1.000 0.500 19180728.204 1.000 0.667
H3 1.000 1.000 0.000 0.500 0.000 0.500 0.000 0.000 0.000
Hu 0.333
Hypotheses:
H1: Gym=Grund
H2: Gym<Grund
H3: Gym>Grund
Note: BF denotes the Bayes factor of the hypothesis at hand versus its complement.
print(bf_45$BFmatrix)
H1 H2 H3
H1 1.000000e+00 6.970502e-06 1.336994e+02
H2 1.434617e+05 1.000000e+00 1.918075e+07
H3 7.479464e-03 5.213562e-08 1.000000e+00
###### DESCRIPTIVE PLOT ############################################### #
ggplot(p_data%>%dplyr::filter(stunde == 45), aes(x=Schulart, y = dau_hw_min, fill = Schulart)) +
geom_flat_violin(position = position_nudge(x = .1, y = 0),
adjust = 1.6, trim = FALSE, alpha = .3, colour = NA) +
geom_point(position = position_jitter(width = .05), size = 1, alpha = 0.5, aes(color=Schulart)) +
geom_boxplot(position = position_nudge(x = -.15, y = 0),
outlier.shape = NA, alpha = .5, width = .1, colour = "black") +
scale_colour_brewer(palette = "Set1")+
scale_y_continuous(expand = c(0, 0)) +
scale_fill_brewer(palette = "Set1") +
ylab("Minuten") +
theme_light() +
theme(axis.line = element_line(colour = "#696f71"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_rect(fill = "#f6f7f7"))
#### Compute BFs within informed hypotheses framework (bain) ##################################### #
### for bain: compute vcov by hand ###
# create data frame to collect var and cov for each imputation
vcov_df <- data.frame(Grundschule = as.numeric(),
Gymnasium = as.numeric()
)
mean_df <- data.frame(Grundschule = as.numeric(),
Gymnasium = as.numeric()
)
# sample size per group
samp_gr <- table(mice::complete(imp90)$Schulart)
## loop over all m imputations
for(i in 1:m) {
fit_lm <- lm(dau_hw_min ~ Schulart-1, data = mice::complete(imp90, i))
var_lm <- summary(fit_lm)$sigma**2 # get variance of the means (VOM)
vcov_df[i, "Grundschule"] <- var_lm/samp_gr["Grundschule"] # compute VOM per group
vcov_df[i, "Gymnasium"] <- var_lm/samp_gr["Gymnasium"] # compute VOM per group
# collect estimates of the means
mean_df[i, "Grundschule"] <- coef(fit_lm)["SchulartGrundschule"]
mean_df[i, "Gymnasium"] <- coef(fit_lm)["SchulartGymnasium"]
rm(fit_lm, var_lm) # clean up, because we like it tidy in here
}
## make var matrices
# compute the mean over var
vcov_df <- vcov_df %>%
summarize_all(mean)
# create matrices
mat1 <- matrix(vcov_df$Grundschule, 1, 1)
mat2 <- matrix(vcov_df$Gymnasium, 1, 1)
variances <- list(mat1, mat2)
## compute mean of estimates
bf_data <- mean_df %>%
summarize_all(mean)
bf_data <- as.numeric(bf_data)
names(bf_data) <- c("Grund", "Gym")
## BAIN ##### #
# generating hypotheses
hypotheses <- "Gym = Grund; Gym < Grund; Gym > Grund"
bf_90 <- bain(bf_data,
hypothesis = hypotheses,
n = samp_gr, # size of the groups
Sigma = variances, # matrices of residual variances of groups
group_parameters = 1, # there is 1 group specific parameter (the mean in each group)
joint_parameters = 0 # there are no parameters that apply to each of the groups (e.g. the regression coefficient of a covariate)
)
print(bf_90)
Bayesian informative hypothesis testing for an object of class numeric:
Fit_eq Com_eq Fit_in Com_in Fit Com BF PMPa PMPb
H1 0.001 0.039 1.000 1.000 0.001 0.039 0.025 0.012 0.008
H2 1.000 1.000 1.000 0.500 1.000 0.500 2896.832 0.987 0.661
H3 1.000 1.000 0.000 0.500 0.000 0.500 0.000 0.000 0.000
Hu 0.331
Hypotheses:
H1: Gym=Grund
H2: Gym<Grund
H3: Gym>Grund
Note: BF denotes the Bayes factor of the hypothesis at hand versus its complement.
print(bf_90$BFmatrix)
H1 H2 H3
H1 1.0000000 0.0126602106 36.67451
H2 78.9876273 1.0000000000 2896.83229
H3 0.0272669 0.0003452047 1.00000
###### DESCRIPTIVE PLOT ############################################### #
ggplot(p_data%>%dplyr::filter(stunde == 90), aes(x=Schulart, y = dau_hw_min, fill = Schulart)) +
geom_flat_violin(position = position_nudge(x = .1, y = 0),
adjust = 1.6, trim = FALSE, alpha = .3, colour = NA) +
geom_point(position = position_jitter(width = .05), size = 1, alpha = 0.5, aes(color=Schulart)) +
geom_boxplot(position = position_nudge(x = -.15, y = 0),
outlier.shape = NA, alpha = .5, width = .1, colour = "black") +
scale_colour_brewer(palette = "Set1")+
scale_y_continuous(expand = c(0, 0)) +
scale_fill_brewer(palette = "Set1") +
ylab("Minuten") +
theme_light() +
theme(axis.line = element_line(colour = "#696f71"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_rect(fill = "#f6f7f7"))
Deskriptive Daten
descriptives <- p_data %>%
dplyr::select(lh_ank, lh_auf, lh_nen, lh_sch,
lh_erl, lh_wfr, lh_wno, lh_ano,
lh_bfr, sh_auf, sh_mel, sh_fra, sh_not) %>%
psych::describeBy(group = p_data$Schulart)
panderOptions('digits', 3)
pander::pander(descriptives)
Grundschule:
| vars | n | mean | sd | median | trimmed | mad | min | max | |
|---|---|---|---|---|---|---|---|---|---|
| lh_ank | 1 | 322 | 0.925 | 0.263 | 1 | 1 | 0 | 0 | 1 |
| lh_auf | 2 | 320 | 0.809 | 0.393 | 1 | 0.887 | 0 | 0 | 1 |
| lh_nen | 3 | 319 | 0.947 | 0.225 | 1 | 1 | 0 | 0 | 1 |
| lh_sch | 4 | 319 | 0.715 | 0.452 | 1 | 0.767 | 0 | 0 | 1 |
| lh_erl | 5 | 320 | 0.684 | 0.465 | 1 | 0.73 | 0 | 0 | 1 |
| lh_wfr | 6 | 320 | 0.241 | 0.428 | 0 | 0.176 | 0 | 0 | 1 |
| lh_wno | 7 | 322 | 0.696 | 0.461 | 1 | 0.744 | 0 | 0 | 1 |
| lh_ano | 8 | 321 | 0.586 | 0.493 | 1 | 0.607 | 0 | 0 | 1 |
| lh_bfr | 9 | 320 | 0.781 | 1.5 | 0 | 0.48 | 0 | 0 | 15 |
| sh_auf | 10 | 320 | 4.17 | 0.968 | 4 | 4.32 | 1.48 | 1 | 5 |
| sh_mel | 11 | 321 | 0.611 | 1.21 | 0 | 0.319 | 0 | 0 | 9 |
| sh_fra | 12 | 320 | 0.844 | 1.65 | 0 | 0.492 | 0 | 0 | 15 |
| sh_not | 13 | 321 | 71.2 | 38.8 | 90 | 76.5 | 14.8 | 0 | 100 |
| range | skew | kurtosis | se | |
|---|---|---|---|---|
| lh_ank | 1 | -3.22 | 8.43 | 0.0147 |
| lh_auf | 1 | -1.57 | 0.46 | 0.022 |
| lh_nen | 1 | -3.96 | 13.7 | 0.0126 |
| lh_sch | 1 | -0.947 | -1.11 | 0.0253 |
| lh_erl | 1 | -0.79 | -1.38 | 0.026 |
| lh_wfr | 1 | 1.21 | -0.543 | 0.0239 |
| lh_wno | 1 | -0.846 | -1.29 | 0.0257 |
| lh_ano | 1 | -0.346 | -1.89 | 0.0275 |
| lh_bfr | 15 | 4.39 | 31.3 | 0.0838 |
| sh_auf | 4 | -1.23 | 1.29 | 0.0541 |
| sh_mel | 9 | 2.67 | 9.3 | 0.0674 |
| sh_fra | 15 | 3.86 | 22.8 | 0.0921 |
| sh_not | 100 | -1.05 | -0.59 | 2.16 |
Gymnasium:
| vars | n | mean | sd | median | trimmed | mad | min | max | |
|---|---|---|---|---|---|---|---|---|---|
| lh_ank | 1 | 183 | 0.77 | 0.422 | 1 | 0.837 | 0 | 0 | 1 |
| lh_auf | 2 | 181 | 0.591 | 0.493 | 1 | 0.614 | 0 | 0 | 1 |
| lh_nen | 3 | 123 | 0.967 | 0.178 | 1 | 1 | 0 | 0 | 1 |
| lh_sch | 4 | 183 | 0.628 | 0.485 | 1 | 0.66 | 0 | 0 | 1 |
| lh_erl | 5 | 183 | 0.65 | 0.478 | 1 | 0.687 | 0 | 0 | 1 |
| lh_wfr | 6 | 184 | 0.207 | 0.406 | 0 | 0.135 | 0 | 0 | 1 |
| lh_wno | 7 | 184 | 0.321 | 0.468 | 0 | 0.277 | 0 | 0 | 1 |
| lh_ano | 8 | 179 | 0.196 | 0.398 | 0 | 0.124 | 0 | 0 | 1 |
| lh_bfr | 9 | 121 | 0.661 | 1.03 | 0 | 0.443 | 0 | 0 | 6 |
| sh_auf | 10 | 108 | 3.81 | 1.03 | 4 | 3.9 | 1.48 | 1 | 5 |
| sh_mel | 11 | 123 | 0.602 | 0.981 | 0 | 0.394 | 0 | 0 | 6 |
| sh_fra | 12 | 183 | 0.705 | 1.22 | 0 | 0.429 | 0 | 0 | 7 |
| sh_not | 13 | 171 | 59.9 | 37.7 | 70 | 62.3 | 44.5 | 0 | 100 |
| range | skew | kurtosis | se | |
|---|---|---|---|---|
| lh_ank | 1 | -1.28 | -0.374 | 0.0312 |
| lh_auf | 1 | -0.368 | -1.87 | 0.0366 |
| lh_nen | 1 | -5.21 | 25.3 | 0.0161 |
| lh_sch | 1 | -0.527 | -1.73 | 0.0358 |
| lh_erl | 1 | -0.625 | -1.62 | 0.0353 |
| lh_wfr | 1 | 1.44 | 0.0687 | 0.0299 |
| lh_wno | 1 | 0.762 | -1.43 | 0.0345 |
| lh_ano | 1 | 1.52 | 0.32 | 0.0297 |
| lh_bfr | 6 | 2.43 | 7.66 | 0.0936 |
| sh_auf | 4 | -0.435 | -0.793 | 0.0995 |
| sh_mel | 6 | 2.35 | 7.43 | 0.0885 |
| sh_fra | 7 | 2.57 | 7.71 | 0.09 |
| sh_not | 100 | -0.443 | -1.35 | 2.88 |
# Compute density of BFs
cor_results <- data.frame()
for (i in 1:m) {
imp_m <- matrix(table(mice::complete(imp, i)$lh_ank, # create contingency table
mice::complete(imp, i)$Schulart), 2, 2)
fit <- contingencyTableBF(imp_m, sampleType="indepMulti",
fixedMargin = "cols") # GS and GY as fixed
cor_results[i,"lh_ank-Schulart"] <- extractBF(fit)$bf # save BF in data frame
chains <- posterior(fit, iterations = 1000) # get posterior probabilities
showMerkm_givenGS <- chains[,"pi[2,1]"] / chains[,"pi[*,1]"] # compute ratio for GS
showMerkm_givenGY <- chains[,"pi[2,2]"] / chains[,"pi[*,2]"] # compute ratio for GY
cor_results[i,"prob_GSminusGY"] <- mean(mcmc(showMerkm_givenGS - showMerkm_givenGY)) # Markov Chain
# Monte Carlo of differences
rm(imp_m, fit, chains)
}
# show summary of BFs
summary(cor_results[,"lh_ank-Schulart"])
Min. 1st Qu. Median Mean 3rd Qu. Max.
1730 9621 9621 9294 9621 38702
# plot BFs distribution
ggplot(cor_results, aes(x = `lh_ank-Schulart`)) +
geom_density(alpha = .3, fill = "#b4a069", color = NA) +
geom_jitter(aes(x = `lh_ank-Schulart`, y=""),
height = 1, width = 0, alpha = .3,
size = 2.5, fill = "#b4a069", color = "#b4a069") +
geom_vline(xintercept = (1/3), color = "red", alpha = .4,
size = 2, linetype = "dashed") +
geom_vline(xintercept = 3, color = "red", alpha = .4,
size = 2, linetype = "dashed") +
scale_x_continuous(expand = c(0, 0), trans = 'log10') +
scale_y_discrete(expand = c(0, 0)) +
ylab("% Häufigkeit") +
xlab("BF [log10]") +
theme_light() +
theme(axis.line = element_line(colour = "#696f71"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_rect(fill = "#f6f7f7"))
# plot difference of
summary(cor_results[,"prob_GSminusGY"])
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.1430 0.1520 0.1533 0.1532 0.1542 0.1639
# Compute density of BFs
kl_results <- data.frame()
for (i in 1:m) {
fit <- correlationBF(y = mice::complete(imp, i)$lh_ank, # correlation
x = mice::complete(imp, i)$Klassenstufe)
kl_results[i,"lh_ank-Klassenstufe"] <- extractBF(fit)$bf # save BF in data frame
rm(fit)
}
# show summary of BFs
summary(kl_results[,"lh_ank-Klassenstufe"])
Min. 1st Qu. Median Mean 3rd Qu. Max.
2093 18554 19939 17798 20601 27261
# plot BFs distribution
ggplot(kl_results, aes(x = `lh_ank-Klassenstufe`)) +
geom_density(alpha = .3, fill = "#b4a069", color = NA) +
geom_jitter(aes(x = `lh_ank-Klassenstufe`, y=""),
height = 1, width = 0, alpha = .3,
size = 2.5, fill = "#b4a069", color = "#b4a069") +
geom_vline(xintercept = (1/3), color = "red", alpha = .4,
size = 2, linetype = "dashed") +
geom_vline(xintercept = 3, color = "red", alpha = .4,
size = 2, linetype = "dashed") +
scale_x_continuous(expand = c(0, 0), trans = 'log10') +
scale_y_discrete(expand = c(0, 0)) +
ylab("% Häufigkeit") +
xlab("BF [log10]") +
theme_light() +
theme(axis.line = element_line(colour = "#696f71"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_rect(fill = "#f6f7f7"))
###### DESCRIPTIVE PLOT ############################################### #
# plot (non-missing) values as descriptive
lh_ank_p <- p_data %>%
dplyr::filter(!is.na(lh_ank) & !is.na(Klassenstufe)) %>%
group_by(Klassenstufe) %>%
mutate(length = length(lh_ank)) %>%
summarize(lh_ank = mean(lh_ank, na.rm=T),
length_n = mean(length)) %>%
ungroup() %>%
mutate(Klassenstufe = factor(Klassenstufe, levels = c("1", "1_2", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12")),
Schulart = as.factor(case_when(
Klassenstufe == "1" ~ "Grundschule",
Klassenstufe == "1_2" ~ "Grundschule",
Klassenstufe == "2" ~ "Grundschule",
Klassenstufe == "3" ~ "Grundschule",
Klassenstufe == "4" ~ "Grundschule",
TRUE ~ "Gymnasium"
)))
ggplot(lh_ank_p, aes(x = Klassenstufe, y = lh_ank, color = Schulart)) +
geom_line(aes(group = NA), color = "grey", size = 1) +
geom_point(aes(size = length_n)) +
geom_text(aes(label = paste(round(lh_ank*100,0), "%"), vjust = 3)) +
scale_y_continuous(limits = c(0,1), expand = c(0,0)) +
geom_rect(aes(xmin = 0, xmax = 4.5, ymin = -Inf, ymax = Inf), fill = "pink", alpha = .01, color = NA) +
geom_rect(aes(xmin = 4.5, xmax = 14, ymin = -Inf, ymax = Inf), fill = "#BFEFFF", alpha = .01, color = NA) +
xlab("Klassenstufe") +
ylab("% Häufigkeit") +
labs(size = "Anzahl\neingegangener\nStunden") +
theme_light() +
theme(axis.line = element_line(colour = "#696f71"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_rect(fill = "#f6f7f7"))
# Compute density of BFs
for (i in 1:m) {
imp_m <- matrix(table(mice::complete(imp, i)$lh_auf, mice::complete(imp, i)$Schulart), 2, 2)
fit <- contingencyTableBF(imp_m, sampleType="indepMulti",fixedMargin = "cols")
cor_results[i,"lh_auf-Schulart"] <- extractBF(fit)$bf
chains <- posterior(fit, iterations = 1000) # get posterior probabilities
showMerkm_givenGS <- chains[,"pi[2,1]"] / chains[,"pi[*,1]"] # compute ratio for GS
showMerkm_givenGY <- chains[,"pi[2,2]"] / chains[,"pi[*,2]"] # compute ratio for GY
cor_results[i,"prob_GSminusGY"] <- mean(mcmc(showMerkm_givenGS - showMerkm_givenGY)) # Markov Chain Monte Carlo of differences
rm(imp_m, fit, chains)
}
summary(cor_results[,"lh_auf-Schulart"])
Min. 1st Qu. Median Mean 3rd Qu. Max.
11132 71353 85423 131640 159914 774708
ggplot(cor_results, aes(x = `lh_auf-Schulart`)) +
geom_density(alpha = .3, fill = "#b4a069", color = NA) +
geom_jitter(aes(x = `lh_auf-Schulart`, y=""), height = 1, width = 0, alpha = .3, size = 2.5, fill = "#b4a069", color = "#b4a069") +
geom_vline(xintercept = (1/3), color = "red", alpha = .4, size = 2, linetype = "dashed") +
geom_vline(xintercept = 3, color = "red", alpha = .4, size = 2, linetype = "dashed") +
scale_x_continuous(expand = c(0, 0), trans = 'log10') +
scale_y_discrete(expand = c(0, 0)) +
ylab("% Häufigkeit") +
xlab("BF [log10]") +
theme_light() +
theme(axis.line = element_line(colour = "#696f71"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_rect(fill = "#f6f7f7"))
# plot difference of
summary(cor_results[,"prob_GSminusGY"])
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.1999 0.2129 0.2166 0.2169 0.2208 0.2352
# Compute density of BFs
kl_results <- data.frame()
for (i in 1:m) {
fit <- correlationBF(y = mice::complete(imp, i)$lh_auf, # correlation
x = mice::complete(imp, i)$Klassenstufe)
kl_results[i,"lh_auf-Klassenstufe"] <- extractBF(fit)$bf # save BF in data frame
rm(fit)
}
# show summary of BFs
summary(kl_results[,"lh_auf-Klassenstufe"])
Min. 1st Qu. Median Mean 3rd Qu. Max.
88811 596357 1085965 1381383 1685379 6867900
# plot BFs distribution
ggplot(kl_results, aes(x = `lh_auf-Klassenstufe`)) +
geom_density(alpha = .3, fill = "#b4a069", color = NA) +
geom_jitter(aes(x = `lh_auf-Klassenstufe`, y=""),
height = 1, width = 0, alpha = .3,
size = 2.5, fill = "#b4a069", color = "#b4a069") +
geom_vline(xintercept = (1/3), color = "red", alpha = .4,
size = 2, linetype = "dashed") +
geom_vline(xintercept = 3, color = "red", alpha = .4,
size = 2, linetype = "dashed") +
scale_x_continuous(expand = c(0, 0), trans = 'log10') +
scale_y_discrete(expand = c(0, 0)) +
ylab("% Häufigkeit") +
xlab("BF [log10]") +
theme_light() +
theme(axis.line = element_line(colour = "#696f71"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_rect(fill = "#f6f7f7"))
###### DESCRIPTIVE PLOT ############################################### #
lh_auf_p <- p_data %>%
dplyr::filter(!is.na(lh_auf) & !is.na(Klassenstufe)) %>%
group_by(Klassenstufe) %>%
mutate(length = length(lh_auf)) %>%
summarize(lh_auf = mean(lh_auf, na.rm=T),
length_n = mean(length)) %>%
ungroup() %>%
mutate(Klassenstufe = factor(Klassenstufe, levels = c("1", "1_2", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12")),
Schulart = as.factor(case_when(
Klassenstufe == "1" ~ "Grundschule",
Klassenstufe == "1_2" ~ "Grundschule",
Klassenstufe == "2" ~ "Grundschule",
Klassenstufe == "3" ~ "Grundschule",
Klassenstufe == "4" ~ "Grundschule",
TRUE ~ "Gymnasium"
)))
ggplot(lh_auf_p, aes(x = Klassenstufe, y = lh_auf, color = Schulart)) +
geom_line(aes(group = NA), color = "grey", size = 1) +
geom_point(aes(size = length_n)) +
geom_text(aes(label = paste(round(lh_auf*100,0), "%"), vjust = 3)) +
scale_y_continuous(limits = c(0,1), expand = c(0,0)) +
geom_rect(aes(xmin = 0, xmax = 4.5, ymin = -Inf, ymax = Inf), fill = "pink", alpha = .01, color = NA) +
geom_rect(aes(xmin = 4.5, xmax = 14, ymin = -Inf, ymax = Inf), fill = "#BFEFFF", alpha = .01, color = NA) +
xlab("Klassenstufe") +
ylab("% Häufigkeit") +
labs(size = "Anzahl\neingegangener\nStunden") +
theme_light() +
theme(axis.line = element_line(colour = "#696f71"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_rect(fill = "#f6f7f7"))
# Compute density of BFs
for (i in 1:m) {
imp_m <- matrix(table(mice::complete(imp, i)$lh_nen, mice::complete(imp, i)$Schulart), 2, 2)
fit <- contingencyTableBF(imp_m, sampleType="indepMulti",fixedMargin = "cols")
cor_results[i,"lh_nen-Schulart"] <- extractBF(fit)$bf
chains <- posterior(fit, iterations = 1000) # get posterior probabilities
showMerkm_givenGS <- chains[,"pi[2,1]"] / chains[,"pi[*,1]"] # compute ratio for GS
showMerkm_givenGY <- chains[,"pi[2,2]"] / chains[,"pi[*,2]"] # compute ratio for GY
cor_results[i,"prob_GSminusGY"] <- mean(mcmc(showMerkm_givenGS - showMerkm_givenGY)) # Markov Chain Monte Carlo of differences
rm(imp_m, fit, chains)
}
summary(cor_results[,"lh_nen-Schulart"])
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.05133 0.05412 0.06194 0.08576 0.07829 3.59638
ggplot(cor_results, aes(x = `lh_nen-Schulart`)) +
geom_density(alpha = .3, fill = "#b4a069", color = NA) +
geom_jitter(aes(x = `lh_nen-Schulart`, y=""), height = 1, width = 0, alpha = .3, size = 2.5, fill = "#b4a069", color = "#b4a069") +
geom_vline(xintercept = (1/3), color = "red", alpha = .4, size = 2, linetype = "dashed") +
geom_vline(xintercept = 3, color = "red", alpha = .4, size = 2, linetype = "dashed") +
scale_x_continuous(expand = c(0, 0), trans = 'log10') +
scale_y_discrete(expand = c(0, 0)) +
ylab("% Häufigkeit") +
xlab("BF [log10]") +
theme_light() +
theme(axis.line = element_line(colour = "#696f71"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_rect(fill = "#f6f7f7"))
# plot difference of
summary(cor_results[,"prob_GSminusGY"])
Min. 1st Qu. Median Mean 3rd Qu. Max.
-0.033835 -0.012622 -0.003918 -0.001079 0.009004 0.071875
# Compute density of BFs
kl_results <- data.frame()
for (i in 1:m) {
fit <- correlationBF(y = mice::complete(imp, i)$lh_nen, # correlation
x = mice::complete(imp, i)$Klassenstufe)
kl_results[i,"lh_nen-Klassenstufe"] <- extractBF(fit)$bf # save BF in data frame
rm(fit)
}
# show summary of BFs
summary(kl_results[,"lh_nen-Klassenstufe"])
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.1036 0.1111 0.1391 0.8039 0.2199 504.1839
# plot BFs distribution
ggplot(kl_results, aes(x = `lh_nen-Klassenstufe`)) +
geom_density(alpha = .3, fill = "#b4a069", color = NA) +
geom_jitter(aes(x = `lh_nen-Klassenstufe`, y=""),
height = 1, width = 0, alpha = .3,
size = 2.5, fill = "#b4a069", color = "#b4a069") +
geom_vline(xintercept = (1/3), color = "red", alpha = .4,
size = 2, linetype = "dashed") +
geom_vline(xintercept = 3, color = "red", alpha = .4,
size = 2, linetype = "dashed") +
scale_x_continuous(expand = c(0, 0), trans = 'log10') +
scale_y_discrete(expand = c(0, 0)) +
ylab("% Häufigkeit") +
xlab("BF [log10]") +
theme_light() +
theme(axis.line = element_line(colour = "#696f71"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_rect(fill = "#f6f7f7"))
###### DESCRIPTIVE PLOT ############################################### #
lh_nen_p <- p_data %>%
dplyr::filter(!is.na(lh_nen) & !is.na(Klassenstufe)) %>%
group_by(Klassenstufe) %>%
mutate(length = length(lh_nen)) %>%
summarize(lh_nen = mean(lh_nen, na.rm=T),
length_n = mean(length)) %>%
ungroup() %>%
mutate(Klassenstufe = factor(Klassenstufe, levels = c("1", "1_2", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12")),
Schulart = as.factor(case_when(
Klassenstufe == "1" ~ "Grundschule",
Klassenstufe == "1_2" ~ "Grundschule",
Klassenstufe == "2" ~ "Grundschule",
Klassenstufe == "3" ~ "Grundschule",
Klassenstufe == "4" ~ "Grundschule",
TRUE ~ "Gymnasium"
)))
ggplot(lh_nen_p, aes(x = Klassenstufe, y = lh_nen, color = Schulart)) +
geom_line(aes(group = NA), color = "grey", size = 1) +
geom_point(aes(size = length_n)) +
geom_text(aes(label = paste(round(lh_nen*100,0), "%"), vjust = 3)) +
scale_y_continuous(limits = c(0,1), expand = c(0,0)) +
geom_rect(aes(xmin = 0, xmax = 4.5, ymin = -Inf, ymax = Inf), fill = "pink", alpha = .01, color = NA) +
geom_rect(aes(xmin = 4.5, xmax = 14, ymin = -Inf, ymax = Inf), fill = "#BFEFFF", alpha = .01, color = NA) +
xlab("Klassenstufe") +
ylab("% Häufigkeit") +
labs(size = "Anzahl\neingegangener\nStunden") +
theme_light() +
theme(axis.line = element_line(colour = "#696f71"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_rect(fill = "#f6f7f7"))
# Compute density of BFs
for (i in 1:m) {
imp_m <- matrix(table(mice::complete(imp, i)$lh_sch, mice::complete(imp, i)$Schulart), 2, 2)
fit <- contingencyTableBF(imp_m, sampleType="indepMulti",fixedMargin = "cols")
cor_results[i,"lh_sch-Schulart"] <- extractBF(fit)$bf
chains <- posterior(fit, iterations = 1000) # get posterior probabilities
showMerkm_givenGS <- chains[,"pi[2,1]"] / chains[,"pi[*,1]"] # compute ratio for GS
showMerkm_givenGY <- chains[,"pi[2,2]"] / chains[,"pi[*,2]"] # compute ratio for GY
cor_results[i,"prob_GSminusGY"] <- mean(mcmc(showMerkm_givenGS - showMerkm_givenGY)) # Markov Chain Monte Carlo of differences
rm(imp_m, fit, chains)
}
summary(cor_results[,"lh_sch-Schulart"])
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.4367 0.6171 0.7092 0.7208 0.8198 1.4633
ggplot(cor_results, aes(x = `lh_sch-Schulart`)) +
geom_density(alpha = .3, fill = "#b4a069", color = NA) +
geom_jitter(aes(x = `lh_sch-Schulart`, y=""), height = 1, width = 0, alpha = .3, size = 2.5, fill = "#b4a069", color = "#b4a069") +
geom_vline(xintercept = (1/3), color = "red", alpha = .4, size = 2, linetype = "dashed") +
geom_vline(xintercept = 3, color = "red", alpha = .4, size = 2, linetype = "dashed") +
scale_x_continuous(expand = c(0, 0), trans = 'log10') +
scale_y_discrete(expand = c(0, 0)) +
ylab("% Häufigkeit") +
xlab("BF [log10]") +
theme_light() +
theme(axis.line = element_line(colour = "#696f71"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_rect(fill = "#f6f7f7"))
# plot difference of
summary(cor_results[,"prob_GSminusGY"])
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.06973 0.07983 0.08320 0.08339 0.08661 0.09953
# Compute density of BFs
kl_results <- data.frame()
for (i in 1:m) {
fit <- correlationBF(y = mice::complete(imp, i)$lh_sch, # correlation
x = mice::complete(imp, i)$Klassenstufe)
kl_results[i,"lh_sch-Klassenstufe"] <- extractBF(fit)$bf # save BF in data frame
rm(fit)
}
# show summary of BFs
summary(kl_results[,"lh_sch-Klassenstufe"])
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.1986 0.2669 0.2969 0.3188 0.3514 0.6692
# plot BFs distribution
ggplot(kl_results, aes(x = `lh_sch-Klassenstufe`)) +
geom_density(alpha = .3, fill = "#b4a069", color = NA) +
geom_jitter(aes(x = `lh_sch-Klassenstufe`, y=""),
height = 1, width = 0, alpha = .3,
size = 2.5, fill = "#b4a069", color = "#b4a069") +
geom_vline(xintercept = (1/3), color = "red", alpha = .4,
size = 2, linetype = "dashed") +
geom_vline(xintercept = 3, color = "red", alpha = .4,
size = 2, linetype = "dashed") +
scale_x_continuous(expand = c(0, 0), trans = 'log10') +
scale_y_discrete(expand = c(0, 0)) +
ylab("% Häufigkeit") +
xlab("BF [log10]") +
theme_light() +
theme(axis.line = element_line(colour = "#696f71"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_rect(fill = "#f6f7f7"))
###### DESCRIPTIVE PLOT ############################################### #
lh_sch_p <- p_data %>%
dplyr::filter(!is.na(lh_sch) & !is.na(Klassenstufe)) %>%
group_by(Klassenstufe) %>%
mutate(length = length(lh_sch)) %>%
summarize(lh_sch = mean(lh_sch, na.rm=T),
length_n = mean(length)) %>%
ungroup() %>%
mutate(Klassenstufe = factor(Klassenstufe, levels = c("1", "1_2", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12")),
Schulart = as.factor(case_when(
Klassenstufe == "1" ~ "Grundschule",
Klassenstufe == "1_2" ~ "Grundschule",
Klassenstufe == "2" ~ "Grundschule",
Klassenstufe == "3" ~ "Grundschule",
Klassenstufe == "4" ~ "Grundschule",
TRUE ~ "Gymnasium"
)))
ggplot(lh_sch_p, aes(x = Klassenstufe, y = lh_sch, color = Schulart)) +
geom_line(aes(group = NA), color = "grey", size = 1) +
geom_point(aes(size = length_n)) +
geom_text(aes(label = paste(round(lh_sch*100,0), "%"), vjust = 3)) +
scale_y_continuous(limits = c(0,1), expand = c(0,0)) +
geom_rect(aes(xmin = 0, xmax = 4.5, ymin = -Inf, ymax = Inf), fill = "pink", alpha = .01, color = NA) +
geom_rect(aes(xmin = 4.5, xmax = 14, ymin = -Inf, ymax = Inf), fill = "#BFEFFF", alpha = .01, color = NA) +
xlab("Klassenstufe") +
ylab("% Häufigkeit") +
labs(size = "Anzahl\neingegangener\nStunden") +
theme_light() +
theme(axis.line = element_line(colour = "#696f71"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_rect(fill = "#f6f7f7"))
# Compute density of BFs
for (i in 1:m) {
imp_m <- matrix(table(mice::complete(imp, i)$lh_erl, mice::complete(imp, i)$Schulart), 2, 2)
fit <- contingencyTableBF(imp_m, sampleType="indepMulti",fixedMargin = "cols")
cor_results[i,"lh_erl-Schulart"] <- extractBF(fit)$bf
chains <- posterior(fit, iterations = 1000) # get posterior probabilities
showMerkm_givenGS <- chains[,"pi[2,1]"] / chains[,"pi[*,1]"] # compute ratio for GS
showMerkm_givenGY <- chains[,"pi[2,2]"] / chains[,"pi[*,2]"] # compute ratio for GY
cor_results[i,"prob_GSminusGY"] <- mean(mcmc(showMerkm_givenGS - showMerkm_givenGY)) # Markov Chain Monte Carlo of differences
rm(imp_m, fit, chains)
}
summary(cor_results[,"lh_erl-Schulart"])
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.1248 0.1411 0.1488 0.1512 0.1578 0.1906
ggplot(cor_results, aes(x = `lh_erl-Schulart`)) +
geom_density(alpha = .3, fill = "#b4a069", color = NA) +
geom_jitter(aes(x = `lh_erl-Schulart`, y=""), height = 1, width = 0, alpha = .3, size = 2.5, fill = "#b4a069", color = "#b4a069") +
geom_vline(xintercept = (1/3), color = "red", alpha = .4, size = 2, linetype = "dashed") +
geom_vline(xintercept = 3, color = "red", alpha = .4, size = 2, linetype = "dashed") +
scale_x_continuous(expand = c(0, 0), trans = 'log10') +
scale_y_discrete(expand = c(0, 0)) +
ylab("% Häufigkeit") +
xlab("BF [log10]") +
theme_light() +
theme(axis.line = element_line(colour = "#696f71"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_rect(fill = "#f6f7f7"))
# plot difference of
summary(cor_results[,"prob_GSminusGY"])
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.02237 0.03208 0.03535 0.03530 0.03829 0.04922
# Compute density of BFs
kl_results <- data.frame()
for (i in 1:m) {
fit <- correlationBF(y = mice::complete(imp, i)$lh_erl, # correlation
x = mice::complete(imp, i)$Klassenstufe)
kl_results[i,"lh_erl-Klassenstufe"] <- extractBF(fit)$bf # save BF in data frame
rm(fit)
}
# show summary of BFs
summary(kl_results[,"lh_erl-Klassenstufe"])
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.1253 0.1460 0.1560 0.1595 0.1697 0.2328
# plot BFs distribution
ggplot(kl_results, aes(x = `lh_erl-Klassenstufe`)) +
geom_density(alpha = .3, fill = "#b4a069", color = NA) +
geom_jitter(aes(x = `lh_erl-Klassenstufe`, y=""),
height = 1, width = 0, alpha = .3,
size = 2.5, fill = "#b4a069", color = "#b4a069") +
geom_vline(xintercept = (1/3), color = "red", alpha = .4,
size = 2, linetype = "dashed") +
geom_vline(xintercept = 3, color = "red", alpha = .4,
size = 2, linetype = "dashed") +
scale_x_continuous(expand = c(0, 0), trans = 'log10') +
scale_y_discrete(expand = c(0, 0)) +
ylab("% Häufigkeit") +
xlab("BF [log10]") +
theme_light() +
theme(axis.line = element_line(colour = "#696f71"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_rect(fill = "#f6f7f7"))
###### DESCRIPTIVE PLOT ############################################### #
lh_erl_p <- p_data %>%
dplyr::filter(!is.na(lh_erl) & !is.na(Klassenstufe)) %>%
group_by(Klassenstufe) %>%
mutate(length = length(lh_erl)) %>%
summarize(lh_erl = mean(lh_erl, na.rm=T),
length_n = mean(length)) %>%
ungroup() %>%
mutate(Klassenstufe = factor(Klassenstufe, levels = c("1", "1_2", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12")),
Schulart = as.factor(case_when(
Klassenstufe == "1" ~ "Grundschule",
Klassenstufe == "1_2" ~ "Grundschule",
Klassenstufe == "2" ~ "Grundschule",
Klassenstufe == "3" ~ "Grundschule",
Klassenstufe == "4" ~ "Grundschule",
TRUE ~ "Gymnasium"
)))
ggplot(lh_erl_p, aes(x = Klassenstufe, y = lh_erl, color = Schulart)) +
geom_line(aes(group = NA), color = "grey", size = 1) +
geom_point(aes(size = length_n)) +
geom_text(aes(label = paste(round(lh_erl*100,0), "%"), vjust = 3)) +
scale_y_continuous(limits = c(0,1), expand = c(0,0)) +
geom_rect(aes(xmin = 0, xmax = 4.5, ymin = -Inf, ymax = Inf), fill = "pink", alpha = .01, color = NA) +
geom_rect(aes(xmin = 4.5, xmax = 14, ymin = -Inf, ymax = Inf), fill = "#BFEFFF", alpha = .01, color = NA) +
xlab("Klassenstufe") +
ylab("% Häufigkeit") +
labs(size = "Anzahl\neingegangener\nStunden") +
theme_light() +
theme(axis.line = element_line(colour = "#696f71"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_rect(fill = "#f6f7f7"))
# Compute density of BFs
for (i in 1:m) {
imp_m <- matrix(table(mice::complete(imp, i)$lh_wfr, mice::complete(imp, i)$Schulart), 2, 2)
fit <- contingencyTableBF(imp_m, sampleType="indepMulti",fixedMargin = "cols")
cor_results[i,"lh_wfr-Schulart"] <- extractBF(fit)$bf
chains <- posterior(fit, iterations = 1000) # get posterior probabilities
showMerkm_givenGS <- chains[,"pi[2,1]"] / chains[,"pi[*,1]"] # compute ratio for GS
showMerkm_givenGY <- chains[,"pi[2,2]"] / chains[,"pi[*,2]"] # compute ratio for GY
cor_results[i,"prob_GSminusGY"] <- mean(mcmc(showMerkm_givenGS - showMerkm_givenGY)) # Markov Chain Monte Carlo of differences
rm(imp_m, fit, chains)
}
summary(cor_results[,"lh_wfr-Schulart"])
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.1198 0.1420 0.1530 0.1509 0.1530 0.1976
ggplot(cor_results, aes(x = `lh_wfr-Schulart`)) +
geom_density(alpha = .3, fill = "#b4a069", color = NA) +
geom_jitter(aes(x = `lh_wfr-Schulart`, y=""), height = 1, width = 0, alpha = .3, size = 2.5, fill = "#b4a069", color = "#b4a069") +
geom_vline(xintercept = (1/3), color = "red", alpha = .4, size = 2, linetype = "dashed") +
geom_vline(xintercept = 3, color = "red", alpha = .4, size = 2, linetype = "dashed") +
scale_x_continuous(expand = c(0, 0), trans = 'log10') +
scale_y_discrete(expand = c(0, 0)) +
ylab("% Häufigkeit") +
xlab("BF [log10]") +
theme_light() +
theme(axis.line = element_line(colour = "#696f71"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_rect(fill = "#f6f7f7"))
# plot difference of
summary(cor_results[,"prob_GSminusGY"])
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.02354 0.03273 0.03522 0.03525 0.03765 0.04722
# Compute density of BFs
kl_results <- data.frame()
for (i in 1:m) {
fit <- correlationBF(y = mice::complete(imp, i)$lh_wfr, # correlation
x = mice::complete(imp, i)$Klassenstufe)
kl_results[i,"lh_wfr-Klassenstufe"] <- extractBF(fit)$bf # save BF in data frame
rm(fit)
}
# show summary of BFs
summary(kl_results[,"lh_wfr-Klassenstufe"])
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.1036 0.1076 0.1102 0.1106 0.1132 0.1267
# plot BFs distribution
ggplot(kl_results, aes(x = `lh_wfr-Klassenstufe`)) +
geom_density(alpha = .3, fill = "#b4a069", color = NA) +
geom_jitter(aes(x = `lh_wfr-Klassenstufe`, y=""),
height = 1, width = 0, alpha = .3,
size = 2.5, fill = "#b4a069", color = "#b4a069") +
geom_vline(xintercept = (1/3), color = "red", alpha = .4,
size = 2, linetype = "dashed") +
geom_vline(xintercept = 3, color = "red", alpha = .4,
size = 2, linetype = "dashed") +
scale_x_continuous(expand = c(0, 0), trans = 'log10') +
scale_y_discrete(expand = c(0, 0)) +
ylab("% Häufigkeit") +
xlab("BF [log10]") +
theme_light() +
theme(axis.line = element_line(colour = "#696f71"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_rect(fill = "#f6f7f7"))
###### DESCRIPTIVE PLOT ############################################### #
lh_wfr_p <- p_data %>%
dplyr::filter(!is.na(lh_wfr) & !is.na(Klassenstufe)) %>%
group_by(Klassenstufe) %>%
mutate(length = length(lh_wfr)) %>%
summarize(lh_wfr = mean(lh_wfr, na.rm=T),
length_n = mean(length)) %>%
ungroup() %>%
mutate(Klassenstufe = factor(Klassenstufe, levels = c("1", "1_2", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12")),
Schulart = as.factor(case_when(
Klassenstufe == "1" ~ "Grundschule",
Klassenstufe == "1_2" ~ "Grundschule",
Klassenstufe == "2" ~ "Grundschule",
Klassenstufe == "3" ~ "Grundschule",
Klassenstufe == "4" ~ "Grundschule",
TRUE ~ "Gymnasium"
)))
ggplot(lh_wfr_p, aes(x = Klassenstufe, y = lh_wfr, color = Schulart)) +
geom_line(aes(group = NA), color = "grey", size = 1) +
geom_point(aes(size = length_n)) +
geom_text(aes(label = paste(round(lh_wfr*100,0), "%"), vjust = 3)) +
scale_y_continuous(limits = c(0,1), expand = c(0,0)) +
geom_rect(aes(xmin = 0, xmax = 4.5, ymin = -Inf, ymax = Inf), fill = "pink", alpha = .01, color = NA) +
geom_rect(aes(xmin = 4.5, xmax = 14, ymin = -Inf, ymax = Inf), fill = "#BFEFFF", alpha = .01, color = NA) +
xlab("Klassenstufe") +
ylab("% Häufigkeit") +
labs(size = "Anzahl\neingegangener\nStunden") +
theme_light() +
theme(axis.line = element_line(colour = "#696f71"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_rect(fill = "#f6f7f7"))
# Compute density of BFs
for (i in 1:m) {
imp_m <- matrix(table(mice::complete(imp, i)$lh_wno, mice::complete(imp, i)$Schulart), 2, 2)
fit <- contingencyTableBF(imp_m, sampleType="indepMulti",fixedMargin = "cols")
cor_results[i,"lh_wno-Schulart"] <- extractBF(fit)$bf
chains <- posterior(fit, iterations = 1000) # get posterior probabilities
showMerkm_givenGS <- chains[,"pi[2,1]"] / chains[,"pi[*,1]"] # compute ratio for GS
showMerkm_givenGY <- chains[,"pi[2,2]"] / chains[,"pi[*,2]"] # compute ratio for GY
cor_results[i,"prob_GSminusGY"] <- mean(mcmc(showMerkm_givenGS - showMerkm_givenGY)) # Markov Chain Monte Carlo of differences
rm(imp_m, fit, chains)
}
summary(cor_results[,"lh_wno-Schulart"])
Min. 1st Qu. Median Mean 3rd Qu. Max.
1.754e+13 5.581e+13 8.324e+13 9.702e+13 1.502e+14 1.502e+14
ggplot(cor_results, aes(x = `lh_wno-Schulart`)) +
geom_density(alpha = .3, fill = "#b4a069", color = NA) +
geom_jitter(aes(x = `lh_wno-Schulart`, y=""), height = 1, width = 0, alpha = .3, size = 2.5, fill = "#b4a069", color = "#b4a069") +
geom_vline(xintercept = (1/3), color = "red", alpha = .4, size = 2, linetype = "dashed") +
geom_vline(xintercept = 3, color = "red", alpha = .4, size = 2, linetype = "dashed") +
scale_x_continuous(expand = c(0, 0), trans = 'log10') +
scale_y_discrete(expand = c(0, 0)) +
ylab("% Häufigkeit") +
xlab("BF [log10]") +
theme_light() +
theme(axis.line = element_line(colour = "#696f71"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_rect(fill = "#f6f7f7"))
# plot difference of
summary(cor_results[,"prob_GSminusGY"])
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.3622 0.3714 0.3738 0.3734 0.3760 0.3803
# Compute density of BFs
kl_results <- data.frame()
for (i in 1:m) {
fit <- correlationBF(y = mice::complete(imp, i)$lh_wno, # correlation
x = mice::complete(imp, i)$Klassenstufe)
kl_results[i,"lh_wno-Klassenstufe"] <- extractBF(fit)$bf # save BF in data frame
rm(fit)
}
# show summary of BFs
summary(kl_results[,"lh_wno-Klassenstufe"])
Min. 1st Qu. Median Mean 3rd Qu. Max.
2.011e+12 9.610e+12 2.044e+13 2.122e+13 3.116e+13 5.261e+13
# plot BFs distribution
ggplot(kl_results, aes(x = `lh_wno-Klassenstufe`)) +
geom_density(alpha = .3, fill = "#b4a069", color = NA) +
geom_jitter(aes(x = `lh_wno-Klassenstufe`, y=""),
height = 1, width = 0, alpha = .3,
size = 2.5, fill = "#b4a069", color = "#b4a069") +
geom_vline(xintercept = (1/3), color = "red", alpha = .4,
size = 2, linetype = "dashed") +
geom_vline(xintercept = 3, color = "red", alpha = .4,
size = 2, linetype = "dashed") +
scale_x_continuous(expand = c(0, 0), trans = 'log10') +
scale_y_discrete(expand = c(0, 0)) +
ylab("% Häufigkeit") +
xlab("BF [log10]") +
theme_light() +
theme(axis.line = element_line(colour = "#696f71"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_rect(fill = "#f6f7f7"))
###### DESCRIPTIVE PLOT ############################################### #
lh_wno_p <- p_data %>%
dplyr::filter(!is.na(lh_wno) & !is.na(Klassenstufe)) %>%
group_by(Klassenstufe) %>%
mutate(length = length(lh_wno)) %>%
summarize(lh_wno = mean(lh_wno, na.rm=T),
length_n = mean(length)) %>%
ungroup() %>%
mutate(Klassenstufe = factor(Klassenstufe, levels = c("1", "1_2", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12")),
Schulart = as.factor(case_when(
Klassenstufe == "1" ~ "Grundschule",
Klassenstufe == "1_2" ~ "Grundschule",
Klassenstufe == "2" ~ "Grundschule",
Klassenstufe == "3" ~ "Grundschule",
Klassenstufe == "4" ~ "Grundschule",
TRUE ~ "Gymnasium"
)))
ggplot(lh_wno_p, aes(x = Klassenstufe, y = lh_wno, color = Schulart)) +
geom_line(aes(group = NA), color = "grey", size = 1) +
geom_point(aes(size = length_n)) +
geom_text(aes(label = paste(round(lh_wno*100,0), "%"), vjust = 3)) +
scale_y_continuous(limits = c(0,1), expand = c(0,0)) +
geom_rect(aes(xmin = 0, xmax = 4.5, ymin = -Inf, ymax = Inf), fill = "pink", alpha = .01, color = NA) +
geom_rect(aes(xmin = 4.5, xmax = 14, ymin = -Inf, ymax = Inf), fill = "#BFEFFF", alpha = .01, color = NA) +
xlab("Klassenstufe") +
ylab("% Häufigkeit") +
labs(size = "Anzahl\neingegangener\nStunden") +
theme_light() +
theme(axis.line = element_line(colour = "#696f71"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_rect(fill = "#f6f7f7"))
# Compute density of BFs
for (i in 1:m) {
imp_m <- matrix(table(mice::complete(imp, i)$lh_ano, mice::complete(imp, i)$Schulart), 2, 2)
fit <- contingencyTableBF(imp_m, sampleType="indepMulti",fixedMargin = "cols")
cor_results[i,"lh_ano-Schulart"] <- extractBF(fit)$bf
chains <- posterior(fit, iterations = 1000) # get posterior probabilities
showMerkm_givenGS <- chains[,"pi[2,1]"] / chains[,"pi[*,1]"] # compute ratio for GS
showMerkm_givenGY <- chains[,"pi[2,2]"] / chains[,"pi[*,2]"] # compute ratio for GY
cor_results[i,"prob_GSminusGY"] <- mean(mcmc(showMerkm_givenGS - showMerkm_givenGY)) # Markov Chain Monte Carlo of differences
rm(imp_m, fit, chains)
}
summary(cor_results[,"lh_ano-Schulart"])
Min. 1st Qu. Median Mean 3rd Qu. Max.
2.479e+14 4.433e+15 1.396e+16 1.711e+16 2.613e+16 4.685e+16
ggplot(cor_results, aes(x = `lh_ano-Schulart`)) +
geom_density(alpha = .3, fill = "#b4a069", color = NA) +
geom_jitter(aes(x = `lh_ano-Schulart`, y=""), height = 1, width = 0, alpha = .3, size = 2.5, fill = "#b4a069", color = "#b4a069") +
geom_vline(xintercept = (1/3), color = "red", alpha = .4, size = 2, linetype = "dashed") +
geom_vline(xintercept = 3, color = "red", alpha = .4, size = 2, linetype = "dashed") +
scale_x_continuous(expand = c(0, 0), trans = 'log10') +
scale_y_discrete(expand = c(0, 0)) +
ylab("% Häufigkeit") +
xlab("BF [log10]") +
theme_light() +
theme(axis.line = element_line(colour = "#696f71"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_rect(fill = "#f6f7f7"))
# plot difference of
summary(cor_results[,"prob_GSminusGY"])
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.3720 0.3873 0.3912 0.3907 0.3945 0.4010
# Compute density of BFs
kl_results <- data.frame()
for (i in 1:m) {
fit <- correlationBF(y = mice::complete(imp, i)$lh_ano, # correlation
x = mice::complete(imp, i)$Klassenstufe)
kl_results[i,"lh_ano-Klassenstufe"] <- extractBF(fit)$bf # save BF in data frame
rm(fit)
}
# show summary of BFs
summary(kl_results[,"lh_ano-Klassenstufe"])
Min. 1st Qu. Median Mean 3rd Qu. Max.
1.252e+13 2.947e+14 6.727e+14 9.168e+14 1.349e+15 5.043e+15
# plot BFs distribution
ggplot(kl_results, aes(x = `lh_ano-Klassenstufe`)) +
geom_density(alpha = .3, fill = "#b4a069", color = NA) +
geom_jitter(aes(x = `lh_ano-Klassenstufe`, y=""),
height = 1, width = 0, alpha = .3,
size = 2.5, fill = "#b4a069", color = "#b4a069") +
geom_vline(xintercept = (1/3), color = "red", alpha = .4,
size = 2, linetype = "dashed") +
geom_vline(xintercept = 3, color = "red", alpha = .4,
size = 2, linetype = "dashed") +
scale_x_continuous(expand = c(0, 0), trans = 'log10') +
scale_y_discrete(expand = c(0, 0)) +
ylab("% Häufigkeit") +
xlab("BF [log10]") +
theme_light() +
theme(axis.line = element_line(colour = "#696f71"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_rect(fill = "#f6f7f7"))
###### DESCRIPTIVE PLOT ############################################### #
lh_ano_p <- p_data %>%
dplyr::filter(!is.na(lh_ano) & !is.na(Klassenstufe)) %>%
group_by(Klassenstufe) %>%
mutate(length = length(lh_ano)) %>%
summarize(lh_ano = mean(lh_ano, na.rm=T),
length_n = mean(length)) %>%
ungroup() %>%
mutate(Klassenstufe = factor(Klassenstufe, levels = c("1", "1_2", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12")),
Schulart = as.factor(case_when(
Klassenstufe == "1" ~ "Grundschule",
Klassenstufe == "1_2" ~ "Grundschule",
Klassenstufe == "2" ~ "Grundschule",
Klassenstufe == "3" ~ "Grundschule",
Klassenstufe == "4" ~ "Grundschule",
TRUE ~ "Gymnasium"
)))
ggplot(lh_ano_p, aes(x = Klassenstufe, y = lh_ano, color = Schulart)) +
geom_line(aes(group = NA), color = "grey", size = 1) +
geom_point(aes(size = length_n)) +
geom_text(aes(label = paste(round(lh_ano*100,0), "%"), vjust = 3)) +
scale_y_continuous(limits = c(0,1), expand = c(0,0)) +
geom_rect(aes(xmin = 0, xmax = 4.5, ymin = -Inf, ymax = Inf), fill = "pink", alpha = .01, color = NA) +
geom_rect(aes(xmin = 4.5, xmax = 14, ymin = -Inf, ymax = Inf), fill = "#BFEFFF", alpha = .01, color = NA) +
xlab("Klassenstufe") +
ylab("% Häufigkeit") +
labs(size = "Anzahl\neingegangener\nStunden") +
theme_light() +
theme(axis.line = element_line(colour = "#696f71"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_rect(fill = "#f6f7f7"))
#### Compute BFs within informed hypotheses framework (bain) ##################################### #
### for bain: compute vcov by hand ###
# create data frame to collect var and cov for each imputation
vcov_df <- data.frame(Grundschule = as.numeric(),
Gymnasium = as.numeric()
)
mean_df <- data.frame(Grundschule = as.numeric(),
Gymnasium = as.numeric()
)
# sample size per group
samp_gr <- table(mice::complete(imp)$Schulart)
## loop over all m imputations
for(i in 1:m) {
fit_lm <- lm(lh_bfr ~ Schulart-1, data = mice::complete(imp, i))
var_lm <- summary(fit_lm)$sigma**2 # get variance of the means (VOM)
vcov_df[i, "Grundschule"] <- var_lm/samp_gr["Grundschule"] # compute VOM per group
vcov_df[i, "Gymnasium"] <- var_lm/samp_gr["Gymnasium"] # compute VOM per group
# collect estimates of the means
mean_df[i, "Grundschule"] <- coef(fit_lm)["SchulartGrundschule"]
mean_df[i, "Gymnasium"] <- coef(fit_lm)["SchulartGymnasium"]
rm(fit_lm, var_lm) # clean up, because we like it tidy in here
}
## make var matrices
# compute the mean over var
vcov_df <- vcov_df %>%
summarize_all(mean)
# create matrices
mat1 <- matrix(vcov_df$Grundschule, 1, 1)
mat2 <- matrix(vcov_df$Gymnasium, 1, 1)
variances <- list(mat1, mat2)
## compute mean of estimates
bf_data <- mean_df %>%
summarize_all(mean)
bf_data <- as.numeric(bf_data)
names(bf_data) <- c("Grund", "Gym")
## BAIN ##### #
# generating hypotheses
hypotheses <- "Gym = Grund; Gym < Grund; Gym > Grund"
bf_hyp <- bain(bf_data,
hypothesis = hypotheses,
n = samp_gr, # size of the groups
Sigma = variances, # matrices of residual variances of groups
group_parameters = 1, # there is 1 group specific parameter (the mean in each group)
joint_parameters = 0 # there are no parameters that apply to each of the groups (e.g. the regression coefficient of a covariate)
)
print(bf_hyp)
Bayesian informative hypothesis testing for an object of class numeric:
Fit_eq Com_eq Fit_in Com_in Fit Com BF PMPa PMPb
H1 2.523 0.145 1.000 1.000 2.523 0.145 17.345 0.897 0.853
H2 1.000 1.000 0.749 0.500 0.749 0.500 2.979 0.077 0.074
H3 1.000 1.000 0.251 0.500 0.251 0.500 0.336 0.026 0.025
Hu 0.049
Hypotheses:
H1: Gym=Grund
H2: Gym<Grund
H3: Gym>Grund
Note: BF denotes the Bayes factor of the hypothesis at hand versus its complement.
print(bf_hyp$BFmatrix)
H1 H2 H3
H1 1.00000000 11.5835903 34.510626
H2 0.08632902 1.0000000 2.979269
H3 0.02897658 0.3356529 1.000000
# Compute density of BFs
kl_results <- data.frame()
for (i in 1:m) {
fit <- correlationBF(y = mice::complete(imp, i)$lh_bfr, # correlation
x = mice::complete(imp, i)$Klassenstufe)
kl_results[i,"lh_bfr-Klassenstufe"] <- extractBF(fit)$bf # save BF in data frame
rm(fit)
}
# show summary of BFs
summary(kl_results[,"lh_bfr-Klassenstufe"])
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.1036 0.1293 0.1512 0.1561 0.1770 0.3039
# plot BFs distribution
ggplot(kl_results, aes(x = `lh_bfr-Klassenstufe`)) +
geom_density(alpha = .3, fill = "#b4a069", color = NA) +
geom_jitter(aes(x = `lh_bfr-Klassenstufe`, y=""),
height = 1, width = 0, alpha = .3,
size = 2.5, fill = "#b4a069", color = "#b4a069") +
geom_vline(xintercept = (1/3), color = "red", alpha = .4,
size = 2, linetype = "dashed") +
geom_vline(xintercept = 3, color = "red", alpha = .4,
size = 2, linetype = "dashed") +
scale_x_continuous(expand = c(0, 0), trans = 'log10') +
scale_y_discrete(expand = c(0, 0)) +
ylab("% Häufigkeit") +
xlab("BF [log10]") +
theme_light() +
theme(axis.line = element_line(colour = "#696f71"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_rect(fill = "#f6f7f7"))
###### DESCRIPTIVE PLOT ############################################### #
lh_bfr_p <- p_data %>%
dplyr::filter(!is.na(lh_bfr) & !is.na(Klassenstufe)) %>%
group_by(Klassenstufe) %>%
mutate(length = length(lh_bfr)) %>%
summarize(lh_bfr = mean(lh_bfr, na.rm=T),
length_n = mean(length)) %>%
ungroup() %>%
mutate(Klassenstufe = factor(Klassenstufe, levels = c("1", "1_2", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12")),
Schulart = as.factor(case_when(
Klassenstufe == "1" ~ "Grundschule",
Klassenstufe == "1_2" ~ "Grundschule",
Klassenstufe == "2" ~ "Grundschule",
Klassenstufe == "3" ~ "Grundschule",
Klassenstufe == "4" ~ "Grundschule",
TRUE ~ "Gymnasium"
)))
ggplot(lh_bfr_p, aes(x = Klassenstufe, y = lh_bfr, color = Schulart)) +
geom_line(aes(group = NA), color = "grey", size = 1) +
geom_point(aes(size = length_n)) +
geom_text(aes(label = round(lh_bfr, 2), vjust = 3)) +
scale_y_continuous(limits = c(0,(max(lh_bfr_p$lh_bfr)*1.1)), expand = c(0,0)) +
geom_rect(aes(xmin = 0, xmax = 4.5, ymin = -Inf, ymax = Inf), fill = "pink", alpha = .01, color = NA) +
geom_rect(aes(xmin = 4.5, xmax = 14, ymin = -Inf, ymax = Inf), fill = "#BFEFFF", alpha = .01, color = NA) +
xlab("Klassenstufe") +
ylab("∅ absolute Häufigkeit") +
labs(size = "Anzahl\neingegangener\nStunden") +
theme_light() +
theme(axis.line = element_line(colour = "#696f71"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_rect(fill = "#f6f7f7"))
#### Compute BFs within informed hypotheses framework (bain) ##################################### #
### for bain: compute vcov by hand ###
# create data frame to collect var and cov for each imputation
vcov_df <- data.frame(Grundschule = as.numeric(),
Gymnasium = as.numeric()
)
mean_df <- data.frame(Grundschule = as.numeric(),
Gymnasium = as.numeric()
)
# sample size per group
samp_gr <- table(mice::complete(imp)$Schulart)
## loop over all m imputations
for(i in 1:m) {
fit_lm <- lm(sh_auf ~ Schulart-1, data = mice::complete(imp, i))
var_lm <- summary(fit_lm)$sigma**2 # get variance of the means (VOM)
vcov_df[i, "Grundschule"] <- var_lm/samp_gr["Grundschule"] # compute VOM per group
vcov_df[i, "Gymnasium"] <- var_lm/samp_gr["Gymnasium"] # compute VOM per group
# collect estimates of the means
mean_df[i, "Grundschule"] <- coef(fit_lm)["SchulartGrundschule"]
mean_df[i, "Gymnasium"] <- coef(fit_lm)["SchulartGymnasium"]
rm(fit_lm, var_lm) # clean up, because we like it tidy in here
}
## make var matrices
# compute the mean over var
vcov_df <- vcov_df %>%
summarize_all(mean)
# create matrices
mat1 <- matrix(vcov_df$Grundschule, 1, 1)
mat2 <- matrix(vcov_df$Gymnasium, 1, 1)
variances <- list(mat1, mat2)
## compute mean of estimates
bf_data <- mean_df %>%
summarize_all(mean)
bf_data <- as.numeric(bf_data)
names(bf_data) <- c("Grund", "Gym")
## BAIN ##### #
# generating hypotheses
hypotheses <- "Gym = Grund; Gym < Grund; Gym > Grund"
bf_hyp <- bain(bf_data,
hypothesis = hypotheses,
n = samp_gr, # size of the groups
Sigma = variances, # matrices of residual variances of groups
group_parameters = 1, # there is 1 group specific parameter (the mean in each group)
joint_parameters = 0 # there are no parameters that apply to each of the groups (e.g. the regression coefficient of a covariate)
)
print(bf_hyp)
Bayesian informative hypothesis testing for an object of class numeric:
Fit_eq Com_eq Fit_in Com_in Fit Com BF PMPa PMPb
H1 0.000 0.199 1.000 1.000 0.000 0.199 0.001 0.001 0.000
H2 1.000 1.000 1.000 0.500 1.000 0.500 176990.839 0.999 0.666
H3 1.000 1.000 0.000 0.500 0.000 0.500 0.000 0.000 0.000
Hu 0.333
Hypotheses:
H1: Gym=Grund
H2: Gym<Grund
H3: Gym>Grund
Note: BF denotes the Bayes factor of the hypothesis at hand versus its complement.
print(bf_hyp$BFmatrix)
H1 H2 H3
H1 1.000000e+00 7.07254e-04 125.1775
H2 1.413919e+03 1.00000e+00 176990.8373
H3 7.988658e-03 5.65001e-06 1.0000
# Compute density of BFs
kl_results <- data.frame()
for (i in 1:m) {
fit <- correlationBF(y = mice::complete(imp, i)$sh_auf, # correlation
x = mice::complete(imp, i)$Klassenstufe)
kl_results[i,"sh_auf-Klassenstufe"] <- extractBF(fit)$bf # save BF in data frame
rm(fit)
}
# show summary of BFs
summary(kl_results[,"sh_auf-Klassenstufe"])
Min. 1st Qu. Median Mean 3rd Qu. Max.
2.000e+00 1.440e+03 1.569e+04 1.176e+08 1.690e+05 6.877e+10
# plot BFs distribution
ggplot(kl_results, aes(x = `sh_auf-Klassenstufe`)) +
geom_density(alpha = .3, fill = "#b4a069", color = NA) +
geom_jitter(aes(x = `sh_auf-Klassenstufe`, y=""),
height = 1, width = 0, alpha = .3,
size = 2.5, fill = "#b4a069", color = "#b4a069") +
geom_vline(xintercept = (1/3), color = "red", alpha = .4,
size = 2, linetype = "dashed") +
geom_vline(xintercept = 3, color = "red", alpha = .4,
size = 2, linetype = "dashed") +
scale_x_continuous(expand = c(0, 0), trans = 'log10') +
scale_y_discrete(expand = c(0, 0)) +
ylab("% Häufigkeit") +
xlab("BF [log10]") +
theme_light() +
theme(axis.line = element_line(colour = "#696f71"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_rect(fill = "#f6f7f7"))
###### DESCRIPTIVE PLOT ############################################### #
sh_auf_p <- p_data %>%
dplyr::filter(!is.na(sh_auf) & !is.na(Klassenstufe)) %>%
group_by(Klassenstufe) %>%
mutate(length = length(sh_auf)) %>%
summarize(sh_auf = mean(sh_auf, na.rm=T),
length_n = mean(length)) %>%
ungroup() %>%
mutate(Klassenstufe = factor(Klassenstufe, levels = c("1", "1_2", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12")),
Schulart = as.factor(case_when(
Klassenstufe == "1" ~ "Grundschule",
Klassenstufe == "1_2" ~ "Grundschule",
Klassenstufe == "2" ~ "Grundschule",
Klassenstufe == "3" ~ "Grundschule",
Klassenstufe == "4" ~ "Grundschule",
TRUE ~ "Gymnasium"
)))
ggplot(sh_auf_p, aes(x = Klassenstufe, y = sh_auf, color = Schulart)) +
geom_line(aes(group = NA), color = "grey", size = 1) +
geom_point(aes(size = length_n)) +
geom_text(aes(label = round(sh_auf,1), vjust = -3)) +
scale_y_continuous(limits = c(1,5), expand = c(0,0)) +
geom_rect(aes(xmin = 0, xmax = 4.5, ymin = -Inf, ymax = Inf), fill = "pink", alpha = .01, color = NA) +
geom_rect(aes(xmin = 4.5, xmax = 14, ymin = -Inf, ymax = Inf), fill = "#BFEFFF", alpha = .01, color = NA) +
xlab("Klassenstufe") +
ylab("Zustimmung (Likert Item)") +
labs(size = "Anzahl\neingegangener\nStunden") +
theme_light() +
theme(axis.line = element_line(colour = "#696f71"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_rect(fill = "#f6f7f7"))
#### Compute BFs within informed hypotheses framework (bain) ##################################### #
### for bain: compute vcov by hand ###
# create data frame to collect var and cov for each imputation
vcov_df <- data.frame(Grundschule = as.numeric(),
Gymnasium = as.numeric()
)
mean_df <- data.frame(Grundschule = as.numeric(),
Gymnasium = as.numeric()
)
# sample size per group
samp_gr <- table(mice::complete(imp)$Schulart)
## loop over all m imputations
for(i in 1:m) {
fit_lm <- lm(sh_mel ~ Schulart-1, data = mice::complete(imp, i))
var_lm <- summary(fit_lm)$sigma**2 # get variance of the means (VOM)
vcov_df[i, "Grundschule"] <- var_lm/samp_gr["Grundschule"] # compute VOM per group
vcov_df[i, "Gymnasium"] <- var_lm/samp_gr["Gymnasium"] # compute VOM per group
# collect estimates of the means
mean_df[i, "Grundschule"] <- coef(fit_lm)["SchulartGrundschule"]
mean_df[i, "Gymnasium"] <- coef(fit_lm)["SchulartGymnasium"]
rm(fit_lm, var_lm) # clean up, because we like it tidy in here
}
## make var matrices
# compute the mean over var
vcov_df <- vcov_df %>%
summarize_all(mean)
# create matrices
mat1 <- matrix(vcov_df$Grundschule, 1, 1)
mat2 <- matrix(vcov_df$Gymnasium, 1, 1)
variances <- list(mat1, mat2)
## compute mean of estimates
bf_data <- mean_df %>%
summarize_all(mean)
bf_data <- as.numeric(bf_data)
names(bf_data) <- c("Grund", "Gym")
## BAIN ##### #
# generating hypotheses
hypotheses <- "Gym = Grund; Gym < Grund; Gym > Grund"
bf_hyp <- bain(bf_data,
hypothesis = hypotheses,
n = samp_gr, # size of the groups
Sigma = variances, # matrices of residual variances of groups
group_parameters = 1, # there is 1 group specific parameter (the mean in each group)
joint_parameters = 0 # there are no parameters that apply to each of the groups (e.g. the regression coefficient of a covariate)
)
print(bf_hyp)
Bayesian informative hypothesis testing for an object of class numeric:
Fit_eq Com_eq Fit_in Com_in Fit Com BF PMPa PMPb
H1 3.695 0.170 1.000 1.000 3.695 0.170 21.714 0.916 0.879
H2 1.000 1.000 0.504 0.500 0.504 0.500 1.018 0.043 0.041
H3 1.000 1.000 0.496 0.500 0.496 0.500 0.983 0.042 0.040
Hu 0.040
Hypotheses:
H1: Gym=Grund
H2: Gym<Grund
H3: Gym>Grund
Note: BF denotes the Bayes factor of the hypothesis at hand versus its complement.
print(bf_hyp$BFmatrix)
H1 H2 H3
H1 1.00000000 21.5251188 21.906863
H2 0.04645735 1.0000000 1.017735
H3 0.04564779 0.9825742 1.000000
# Compute density of BFs
kl_results <- data.frame()
for (i in 1:m) {
fit <- correlationBF(y = mice::complete(imp, i)$sh_mel, # correlation
x = mice::complete(imp, i)$Klassenstufe)
kl_results[i,"sh_mel-Klassenstufe"] <- extractBF(fit)$bf # save BF in data frame
rm(fit)
}
# show summary of BFs
summary(kl_results[,"sh_mel-Klassenstufe"])
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.1036 0.1049 0.1091 0.1192 0.1209 0.6346
# plot BFs distribution
ggplot(kl_results, aes(x = `sh_mel-Klassenstufe`)) +
geom_density(alpha = .3, fill = "#b4a069", color = NA) +
geom_jitter(aes(x = `sh_mel-Klassenstufe`, y=""),
height = 1, width = 0, alpha = .3,
size = 2.5, fill = "#b4a069", color = "#b4a069") +
geom_vline(xintercept = (1/3), color = "red", alpha = .4,
size = 2, linetype = "dashed") +
geom_vline(xintercept = 3, color = "red", alpha = .4,
size = 2, linetype = "dashed") +
scale_x_continuous(expand = c(0, 0), trans = 'log10') +
scale_y_discrete(expand = c(0, 0)) +
ylab("% Häufigkeit") +
xlab("BF [log10]") +
theme_light() +
theme(axis.line = element_line(colour = "#696f71"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_rect(fill = "#f6f7f7"))
###### DESCRIPTIVE PLOT ############################################### #
sh_mel_p <- p_data %>%
dplyr::filter(!is.na(sh_mel) & !is.na(Klassenstufe)) %>%
group_by(Klassenstufe) %>%
mutate(length = length(sh_mel)) %>%
summarize(sh_mel = mean(sh_mel, na.rm=T),
length_n = mean(length)) %>%
ungroup() %>%
mutate(Klassenstufe = factor(Klassenstufe, levels = c("1", "1_2", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12")),
Schulart = as.factor(case_when(
Klassenstufe == "1" ~ "Grundschule",
Klassenstufe == "1_2" ~ "Grundschule",
Klassenstufe == "2" ~ "Grundschule",
Klassenstufe == "3" ~ "Grundschule",
Klassenstufe == "4" ~ "Grundschule",
TRUE ~ "Gymnasium"
)))
ggplot(sh_mel_p, aes(x = Klassenstufe, y = sh_mel, color = Schulart)) +
geom_line(aes(group = NA), color = "grey", size = 1) +
geom_point(aes(size = length_n)) +
geom_text(aes(label = round(sh_mel, 2), vjust = 3)) +
scale_y_continuous(limits = c(0,(max(sh_mel_p$sh_mel)*1.1)), expand = c(0,0)) +
geom_rect(aes(xmin = 0, xmax = 4.5, ymin = -Inf, ymax = Inf), fill = "pink", alpha = .01, color = NA) +
geom_rect(aes(xmin = 4.5, xmax = 14, ymin = -Inf, ymax = Inf), fill = "#BFEFFF", alpha = .01, color = NA) +
xlab("Klassenstufe") +
ylab("∅ absolute Häufigkeit") +
labs(size = "Anzahl\neingegangener\nStunden") +
theme_light() +
theme(axis.line = element_line(colour = "#696f71"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_rect(fill = "#f6f7f7"))
#### Compute BFs within informed hypotheses framework (bain) ##################################### #
### for bain: compute vcov by hand ###
# create data frame to collect var and cov for each imputation
vcov_df <- data.frame(Grundschule = as.numeric(),
Gymnasium = as.numeric()
)
mean_df <- data.frame(Grundschule = as.numeric(),
Gymnasium = as.numeric()
)
# sample size per group
samp_gr <- table(mice::complete(imp)$Schulart)
## loop over all m imputations
for(i in 1:m) {
fit_lm <- lm(sh_fra ~ Schulart-1, data = mice::complete(imp, i))
var_lm <- summary(fit_lm)$sigma**2 # get variance of the means (VOM)
vcov_df[i, "Grundschule"] <- var_lm/samp_gr["Grundschule"] # compute VOM per group
vcov_df[i, "Gymnasium"] <- var_lm/samp_gr["Gymnasium"] # compute VOM per group
# collect estimates of the means
mean_df[i, "Grundschule"] <- coef(fit_lm)["SchulartGrundschule"]
mean_df[i, "Gymnasium"] <- coef(fit_lm)["SchulartGymnasium"]
rm(fit_lm, var_lm) # clean up, because we like it tidy in here
}
## make var matrices
# compute the mean over var
vcov_df <- vcov_df %>%
summarize_all(mean)
# create matrices
mat1 <- matrix(vcov_df$Grundschule, 1, 1)
mat2 <- matrix(vcov_df$Gymnasium, 1, 1)
variances <- list(mat1, mat2)
## compute mean of estimates
bf_data <- mean_df %>%
summarize_all(mean)
bf_data <- as.numeric(bf_data)
names(bf_data) <- c("Grund", "Gym")
## BAIN ##### #
# generating hypotheses
hypotheses <- "Gym = Grund; Gym < Grund; Gym > Grund"
bf_hyp <- bain(bf_data,
hypothesis = hypotheses,
n = samp_gr, # size of the groups
Sigma = variances, # matrices of residual variances of groups
group_parameters = 1, # there is 1 group specific parameter (the mean in each group)
joint_parameters = 0 # there are no parameters that apply to each of the groups (e.g. the regression coefficient of a covariate)
)
print(bf_hyp)
Bayesian informative hypothesis testing for an object of class numeric:
Fit_eq Com_eq Fit_in Com_in Fit Com BF PMPa PMPb
H1 1.716 0.133 1.000 1.000 1.716 0.133 12.940 0.866 0.812
H2 1.000 1.000 0.846 0.500 0.846 0.500 5.475 0.113 0.106
H3 1.000 1.000 0.154 0.500 0.154 0.500 0.183 0.021 0.019
Hu 0.063
Hypotheses:
H1: Gym=Grund
H2: Gym<Grund
H3: Gym>Grund
Note: BF denotes the Bayes factor of the hypothesis at hand versus its complement.
print(bf_hyp$BFmatrix)
H1 H2 H3
H1 1.00000000 7.6514875 41.892639
H2 0.13069354 1.0000000 5.475097
H3 0.02387054 0.1826452 1.000000
# Compute density of BFs
kl_results <- data.frame()
for (i in 1:m) {
fit <- correlationBF(y = mice::complete(imp, i)$sh_fra, # correlation
x = mice::complete(imp, i)$Klassenstufe)
kl_results[i,"sh_fra-Klassenstufe"] <- extractBF(fit)$bf # save BF in data frame
rm(fit)
}
# show summary of BFs
summary(kl_results[,"sh_fra-Klassenstufe"])
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.1104 0.1962 0.2105 0.2105 0.2251 0.3108
# plot BFs distribution
ggplot(kl_results, aes(x = `sh_fra-Klassenstufe`)) +
geom_density(alpha = .3, fill = "#b4a069", color = NA) +
geom_jitter(aes(x = `sh_fra-Klassenstufe`, y=""),
height = 1, width = 0, alpha = .3,
size = 2.5, fill = "#b4a069", color = "#b4a069") +
geom_vline(xintercept = (1/3), color = "red", alpha = .4,
size = 2, linetype = "dashed") +
geom_vline(xintercept = 3, color = "red", alpha = .4,
size = 2, linetype = "dashed") +
scale_x_continuous(expand = c(0, 0), trans = 'log10') +
scale_y_discrete(expand = c(0, 0)) +
ylab("% Häufigkeit") +
xlab("BF [log10]") +
theme_light() +
theme(axis.line = element_line(colour = "#696f71"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_rect(fill = "#f6f7f7"))
###### DESCRIPTIVE PLOT ############################################### #
sh_fra_p <- p_data %>%
dplyr::filter(!is.na(sh_fra) & !is.na(Klassenstufe)) %>%
group_by(Klassenstufe) %>%
mutate(length = length(sh_fra)) %>%
summarize(sh_fra = mean(sh_fra, na.rm=T),
length_n = mean(length)) %>%
ungroup() %>%
mutate(Klassenstufe = factor(Klassenstufe, levels = c("1", "1_2", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12")),
Schulart = as.factor(case_when(
Klassenstufe == "1" ~ "Grundschule",
Klassenstufe == "1_2" ~ "Grundschule",
Klassenstufe == "2" ~ "Grundschule",
Klassenstufe == "3" ~ "Grundschule",
Klassenstufe == "4" ~ "Grundschule",
TRUE ~ "Gymnasium"
)))
ggplot(sh_fra_p, aes(x = Klassenstufe, y = sh_fra, color = Schulart)) +
geom_line(aes(group = NA), color = "grey", size = 1) +
geom_point(aes(size = length_n)) +
geom_text(aes(label = round(sh_fra, 2), vjust = 3)) +
scale_y_continuous(limits = c(0,(max(sh_fra_p$sh_fra)*1.1)), expand = c(0,0)) +
geom_rect(aes(xmin = 0, xmax = 4.5, ymin = -Inf, ymax = Inf), fill = "pink", alpha = .01, color = NA) +
geom_rect(aes(xmin = 4.5, xmax = 14, ymin = -Inf, ymax = Inf), fill = "#BFEFFF", alpha = .01, color = NA) +
xlab("Klassenstufe") +
ylab("∅ absolute Häufigkeit") +
labs(size = "Anzahl\neingegangener\nStunden") +
theme_light() +
theme(axis.line = element_line(colour = "#696f71"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_rect(fill = "#f6f7f7"))
#### Compute BFs within informed hypotheses framework (bain) ##################################### #
### for bain: compute vcov by hand ###
# create data frame to collect var and cov for each imputation
vcov_df <- data.frame(Grundschule = as.numeric(),
Gymnasium = as.numeric()
)
mean_df <- data.frame(Grundschule = as.numeric(),
Gymnasium = as.numeric()
)
# sample size per group
samp_gr <- table(mice::complete(imp)$Schulart)
## loop over all m imputations
for(i in 1:m) {
fit_lm <- lm(sh_not ~ Schulart-1, data = mice::complete(imp, i))
var_lm <- summary(fit_lm)$sigma**2 # get variance of the means (VOM)
vcov_df[i, "Grundschule"] <- var_lm/samp_gr["Grundschule"] # compute VOM per group
vcov_df[i, "Gymnasium"] <- var_lm/samp_gr["Gymnasium"] # compute VOM per group
# collect estimates of the means
mean_df[i, "Grundschule"] <- coef(fit_lm)["SchulartGrundschule"]
mean_df[i, "Gymnasium"] <- coef(fit_lm)["SchulartGymnasium"]
rm(fit_lm, var_lm) # clean up, because we like it tidy in here
}
## make var matrices
# compute the mean over var
vcov_df <- vcov_df %>%
summarize_all(mean)
# create matrices
mat1 <- matrix(vcov_df$Grundschule, 1, 1)
mat2 <- matrix(vcov_df$Gymnasium, 1, 1)
variances <- list(mat1, mat2)
## compute mean of estimates
bf_data <- mean_df %>%
summarize_all(mean)
bf_data <- as.numeric(bf_data)
names(bf_data) <- c("Grund", "Gym")
## BAIN ##### #
# generating hypotheses
hypotheses <- "Gym = Grund; Gym < Grund; Gym > Grund"
bf_hyp <- bain(bf_data,
hypothesis = hypotheses,
n = samp_gr, # size of the groups
Sigma = variances, # matrices of residual variances of groups
group_parameters = 1, # there is 1 group specific parameter (the mean in each group)
joint_parameters = 0 # there are no parameters that apply to each of the groups (e.g. the regression coefficient of a covariate)
)
print(bf_hyp)
Bayesian informative hypothesis testing for an object of class numeric:
Fit_eq Com_eq Fit_in Com_in Fit Com BF PMPa PMPb
H1 0.000 0.005 1.000 1.000 0.000 0.005 0.068 0.033 0.022
H2 1.000 1.000 1.000 0.500 1.000 0.500 2905.398 0.967 0.652
H3 1.000 1.000 0.000 0.500 0.000 0.500 0.000 0.000 0.000
Hu 0.326
Hypotheses:
H1: Gym=Grund
H2: Gym<Grund
H3: Gym>Grund
Note: BF denotes the Bayes factor of the hypothesis at hand versus its complement.
print(bf_hyp$BFmatrix)
H1 H2 H3
H1 1.00000000 0.0342085490 99.38945
H2 29.23245880 1.0000000000 2905.39808
H3 0.01006143 0.0003441869 1.00000
# Compute density of BFs
kl_results <- data.frame()
for (i in 1:m) {
fit <- correlationBF(y = mice::complete(imp, i)$sh_not, # correlation
x = mice::complete(imp, i)$Klassenstufe)
kl_results[i,"sh_not-Klassenstufe"] <- extractBF(fit)$bf # save BF in data frame
rm(fit)
}
# show summary of BFs
summary(kl_results[,"sh_not-Klassenstufe"])
Min. 1st Qu. Median Mean 3rd Qu. Max.
1.362 5.455 8.044 9.964 12.732 44.463
# plot BFs distribution
ggplot(kl_results, aes(x = `sh_not-Klassenstufe`)) +
geom_density(alpha = .3, fill = "#b4a069", color = NA) +
geom_jitter(aes(x = `sh_not-Klassenstufe`, y=""),
height = 1, width = 0, alpha = .3,
size = 2.5, fill = "#b4a069", color = "#b4a069") +
geom_vline(xintercept = (1/3), color = "red", alpha = .4,
size = 2, linetype = "dashed") +
geom_vline(xintercept = 3, color = "red", alpha = .4,
size = 2, linetype = "dashed") +
scale_x_continuous(expand = c(0, 0), trans = 'log10') +
scale_y_discrete(expand = c(0, 0)) +
ylab("% Häufigkeit") +
xlab("BF [log10]") +
theme_light() +
theme(axis.line = element_line(colour = "#696f71"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_rect(fill = "#f6f7f7"))
###### DESCRIPTIVE PLOT ############################################### #
sh_not_p <- p_data %>%
dplyr::filter(!is.na(sh_not) & !is.na(Klassenstufe)) %>%
group_by(Klassenstufe) %>%
mutate(length = length(sh_not)) %>%
summarize(sh_not = mean(sh_not, na.rm=T),
length_n = mean(length)) %>%
ungroup() %>%
mutate(Klassenstufe = factor(Klassenstufe, levels = c("1", "1_2", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12")),
Schulart = as.factor(case_when(
Klassenstufe == "1" ~ "Grundschule",
Klassenstufe == "1_2" ~ "Grundschule",
Klassenstufe == "2" ~ "Grundschule",
Klassenstufe == "3" ~ "Grundschule",
Klassenstufe == "4" ~ "Grundschule",
TRUE ~ "Gymnasium"
)))
ggplot(sh_not_p, aes(x = Klassenstufe, y = sh_not, color = Schulart)) +
geom_line(aes(group = NA), color = "grey", size = 1) +
geom_point(aes(size = length_n)) +
geom_text(aes(label = paste(round(sh_not,0), "%"), vjust = 3)) +
scale_y_continuous(limits = c(0,(max(sh_not_p$sh_not)*1.1)), expand = c(0,0)) +
geom_rect(aes(xmin = 0, xmax = 4.5, ymin = -Inf, ymax = Inf), fill = "pink", alpha = .01, color = NA) +
geom_rect(aes(xmin = 4.5, xmax = 14, ymin = -Inf, ymax = Inf), fill = "#BFEFFF", alpha = .01, color = NA) +
xlab("Klassenstufe") +
ylab("∅ absolute Häufigkeit") +
labs(size = "Anzahl\neingegangener\nStunden") +
theme_light() +
theme(axis.line = element_line(colour = "#696f71"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_rect(fill = "#f6f7f7"))
Bitte mit der Maus über die einzelnen Punkte fahren, um zu erfahren um welche Korrelation es sich handelt.
p_dataGS <- p_data %>%
filter(Schulart == "Grundschule")
p_dataGY <- p_data %>%
filter(Schulart == "Gymnasium")
impGS <- mice(p_dataGS,
maxit = maxit,
m = m,
meth = meth,
pred = pred,
seed = 666,
printFlag = F
)
impGY <- mice(p_dataGY,
maxit = maxit,
m = m,
meth = meth,
pred = pred,
seed = 666,
printFlag = F
)
corGS <- miceadds::micombine.cor(impGS, variables = c("lh_ank", "lh_auf", "lh_nen",
"lh_sch", "lh_erl", "lh_wfr",
"lh_wno", "lh_ano", "lh_bfr",
"sh_auf", "sh_mel", "sh_fra",
"sh_not"))
corGY <- miceadds::micombine.cor(impGY, variables = c("lh_ank", "lh_auf", "lh_nen",
"lh_sch", "lh_erl", "lh_wfr",
"lh_wno", "lh_ano", "lh_bfr",
"sh_auf", "sh_mel", "sh_fra",
"sh_not"))
corGS <- corGS[1:78,] %>%
mutate(Korrelation = paste(variable1, variable2, sep="-"),
rGS = round(r, digits = 3)) %>%
dplyr::select(Korrelation, rGS)
corGY <- corGY[1:78,] %>%
mutate(Korrelation = paste(variable1, variable2, sep="-"),
rGY = round(r, digits = 3)) %>%
dplyr::select(Korrelation, rGY)
corGSGY <- full_join(corGS, corGY, by = "Korrelation")
# library(ggrepel)
# ggplot(corGSGY, aes(x=rGS, y=rGY, color = Korrelation, label = Korrelation)) +
# geom_point() +
# scale_x_continuous(limits = c(0,1)) +
# scale_y_continuous(limits = c(0,1)) +
# geom_abline(intercept = 0, slope = 1) +
# geom_label_repel() +
# theme(legend.position = "none")
library(plotly)
axis_def <- list(range = c(-.4, 1),
dtick = 0.25,
automargin = TRUE)
plot_ly() %>%
add_trace(
x = c(-.4,-.4,1),
y = c(-.4, 1, 1),
name = "Korrelation in GY größer",
type = 'scatter',
fill = 'toself',
fillcolor = '#BFEFFF',
opacity = .4,
hoveron = 'points',
marker = list(
color = '#BFEFFF',
opacity = 0),
line = list(
color = '#BFEFFF'),
text = "",
hoverinfo = ''
) %>%
add_trace(
x = c(-.4, 1,1),
y = c(-.4,-.4, 1),
name = "Korrelation in GS größer",
type = 'scatter',
fill = 'toself',
fillcolor = 'pink',
opacity = .4,
hoveron = 'points',
marker = list(
color = 'pink',
opacity = 0),
line = list(
color = 'pink'),
text = "",
hoverinfo = ''
) %>%
add_trace(
data = corGSGY,
x = ~rGS,
y = ~rGY,
type = "scatter",
mode = "markers",
marker = list(
color = "darkgrey",
opacity = .85
),
name = "Korrelationskoeffizient in GY und GS",
text = ~paste("Korrelation: ", Korrelation,
'<br>r<sub>GY</sub>= ', rGY,
'<br>r<sub>GS</sub>= ', rGS)) %>%
layout(
xaxis = axis_def,
yaxis = axis_def,
autosize = F,
width = 800,
height = 800,
legend = list(orientation = 'h',
x = 0,
y = 1))
pander(corGSGY)
| Korrelation | rGS | rGY |
|---|---|---|
| lh_ank-lh_auf | 0.192 | 0.178 |
| lh_ank-lh_nen | -0.013 | 0.001 |
| lh_ank-lh_sch | 0.111 | 0.222 |
| lh_ank-lh_erl | 0.149 | 0.185 |
| lh_ank-lh_wfr | 0.102 | 0.114 |
| lh_ank-lh_bfr | 0.061 | 0.008 |
| lh_ank-lh_wno | 0.181 | 0.107 |
| lh_ank-lh_ano | 0.133 | 0.126 |
| lh_ank-sh_auf | 0.234 | 0.244 |
| lh_ank-sh_mel | 0.028 | 0.054 |
| lh_ank-sh_fra | 0.052 | 0.023 |
| lh_ank-sh_not | 0.158 | 0.15 |
| lh_auf-lh_nen | 0.113 | 0.161 |
| lh_auf-lh_sch | 0.045 | 0.111 |
| lh_auf-lh_erl | 0.144 | 0.154 |
| lh_auf-lh_wfr | 0.143 | 0.176 |
| lh_auf-lh_bfr | 0.004 | 0.056 |
| lh_auf-lh_wno | 0.12 | 0.136 |
| lh_auf-lh_ano | 0.116 | 0.17 |
| lh_auf-sh_auf | 0.281 | 0.093 |
| lh_auf-sh_mel | -0.031 | 0.04 |
| lh_auf-sh_fra | -0.031 | 0.045 |
| lh_auf-sh_not | 0.077 | -0.006 |
| lh_nen-lh_sch | 0.097 | -0.107 |
| lh_nen-lh_erl | 0.136 | 0.025 |
| lh_nen-lh_wfr | 0.031 | 0.011 |
| lh_nen-lh_bfr | 0.031 | -0.021 |
| lh_nen-lh_wno | 0.057 | -0.046 |
| lh_nen-lh_ano | 0.056 | 0.004 |
| lh_nen-sh_auf | 0.099 | 0.152 |
| lh_nen-sh_mel | 0.063 | -0.033 |
| lh_nen-sh_fra | 0.029 | -0.035 |
| lh_nen-sh_not | -0.012 | -0.046 |
| lh_sch-lh_erl | 0.113 | 0.148 |
| lh_sch-lh_wfr | 0.088 | 0.077 |
| lh_sch-lh_bfr | -0.073 | 0.091 |
| lh_sch-lh_wno | 0.464 | 0.377 |
| lh_sch-lh_ano | 0.349 | 0.292 |
| lh_sch-sh_auf | 0.105 | 0.11 |
| lh_sch-sh_mel | 0.002 | -0.004 |
| lh_sch-sh_fra | -0.055 | 0.074 |
| lh_sch-sh_not | 0.544 | 0.585 |
| lh_erl-lh_wfr | 0.21 | 0.143 |
| lh_erl-lh_bfr | 0.183 | 0.151 |
| lh_erl-lh_wno | 0.03 | 0.235 |
| lh_erl-lh_ano | 0.136 | 0.208 |
| lh_erl-sh_auf | 0.307 | 0.111 |
| lh_erl-sh_mel | 0.176 | 0.098 |
| lh_erl-sh_fra | 0.143 | 0.15 |
| lh_erl-sh_not | 0.095 | 0.143 |
| lh_wfr-lh_bfr | 0.155 | 0.078 |
| lh_wfr-lh_wno | 0.139 | 0.165 |
| lh_wfr-lh_ano | 0.15 | 0.189 |
| lh_wfr-sh_auf | 0.085 | 0.185 |
| lh_wfr-sh_mel | 0.324 | 0.089 |
| lh_wfr-sh_fra | 0.137 | 0.048 |
| lh_wfr-sh_not | 0.126 | 0.052 |
| lh_bfr-lh_wno | -0.071 | 0.133 |
| lh_bfr-lh_ano | -0.002 | 0.132 |
| lh_bfr-sh_auf | 0.074 | -0.001 |
| lh_bfr-sh_mel | 0.578 | 0.746 |
| lh_bfr-sh_fra | 0.937 | 0.918 |
| lh_bfr-sh_not | -0.069 | 0.1 |
| lh_wno-lh_ano | 0.637 | 0.556 |
| lh_wno-sh_auf | 0.07 | 0.12 |
| lh_wno-sh_mel | 0.05 | 0.008 |
| lh_wno-sh_fra | -0.044 | 0.122 |
| lh_wno-sh_not | 0.793 | 0.504 |
| lh_ano-sh_auf | 0.104 | 0.253 |
| lh_ano-sh_mel | 0.093 | 0.081 |
| lh_ano-sh_fra | 0.017 | 0.133 |
| lh_ano-sh_not | 0.702 | 0.44 |
| sh_auf-sh_mel | 0.059 | -0.051 |
| sh_auf-sh_fra | 0.084 | -0.001 |
| sh_auf-sh_not | 0.105 | 0.131 |
| sh_mel-sh_fra | 0.565 | 0.753 |
| sh_mel-sh_not | 0.053 | 0.005 |
| sh_fra-sh_not | -0.05 | 0.087 |
miceadds::micombine.cor(imp45, variables = c("beg_hw_min", "dau_hw_min"))
variable1 variable2 r rse fisher_r fisher_rse
1 beg_hw_min dau_hw_min -0.3256192 0.04471457 -0.33792 0.05000159
2 dau_hw_min beg_hw_min -0.3256192 0.04471457 -0.33792 0.05000159
fmi t p lower95 upper95
1 0.03852543 -6.758185 1.397312e-11 -0.4102579 -0.2354189
2 0.03852543 -6.758185 1.397312e-11 -0.4102579 -0.2354189
miceadds::micombine.cor(imp90, variables = c("beg_hw_min", "dau_hw_min"))
variable1 variable2 r rse fisher_r fisher_rse
1 beg_hw_min dau_hw_min -0.484616 0.09540839 -0.5289995 0.1246324
2 dau_hw_min beg_hw_min -0.484616 0.09540839 -0.5289995 0.1246324
fmi t p lower95 upper95
1 0.009568712 -4.244477 2.191038e-05 -0.6488296 -0.277272
2 0.009568712 -4.244477 2.191038e-05 -0.6488296 -0.277272
# p_data_dm <- p_data %>%
# dplyr::select(lh_ank, lh_auf, lh_sch, lh_erl, lh_wfr, lh_wno, lh_ano)
# dicho_r_m <- data.frame()
#
# # Correlation matrix & visualization
# for(i in 1:7) {
# for(j in 1:7){
# # print(c(i,j))
# tmp <- matrix(table(p_data_dm[,i], p_data_dm[,j]), 2, 2)
# dicho_r_m[j,i] <- Yule(tmp) #Alternative phi
# dicho_r_m[i,j] <- Yule(tmp) #Alternative phi
# }
# }
#
# names(dicho_r_m) <- c("lh_ank", "lh_auf", "lh_sch", "lh_erl", "lh_wfr", "lh_wno", "lh_ano")
# row.names(dicho_r_m) <- c("lh_ank", "lh_auf", "lh_sch", "lh_erl", "lh_wfr", "lh_wno", "lh_ano")
# # corrgram(dicho_r_m, type = "cor", lower.panel = "panel.pie", upper.panel = "panel.cor")
#
# # GS vs. GYM
#
# dicho_r_gs <- data.frame(cor_gs = NA, comb = NA)
# dicho_r_gs <- dicho_r_gs[-1,]
#
# p_data_dm_gs <- p_data %>%
# dplyr::filter(Schulart == "Grundschule") %>%
# dplyr::select(lh_ank, lh_auf, lh_sch, lh_erl, lh_wfr, lh_wno, lh_ano)
#
# for(i in 1:6) {
# for(j in (1+i):7){
# tmp <- matrix(table(p_data_dm_gs[,i], p_data_dm_gs[,j]), 2, 2)
# tmp1 <- data.frame()
# tmp1[1, "cor_gs"] <- Yule(tmp)
# tmp1[1, "comb"] <- paste(names(p_data_dm_gs[i]), "-", names(p_data_dm_gs[j]))
# dicho_r_gs <- rbind(dicho_r_gs, tmp1)
# }
# }
#
#
#
#
# dicho_r_gy <- data.frame(cor_gy = NA, comb = NA)
# dicho_r_gy <- dicho_r_gy[-1,]
#
# p_data_dm_gy <- p_data %>%
# dplyr::filter(Schulart == "Gymnasium") %>%
# dplyr::select(lh_ank, lh_auf, lh_sch, lh_erl, lh_wfr, lh_wno, lh_ano)
#
# for(i in 1:6) {
# for(j in (1+i):7){
# tmp <- matrix(table(p_data_dm_gy[,i], p_data_dm_gy[,j]), 2, 2)
# tmp1 <- data.frame()
# tmp1[1, "cor_gy"] <- Yule(tmp)
# tmp1[1, "comb"] <- paste(names(p_data_dm_gy[i]), "-", names(p_data_dm_gy[j]))
# dicho_r_gy <- rbind(dicho_r_gy, tmp1)
# }
# }
#
# dicho_r <- full_join(dicho_r_gs, dicho_r_gy, by="comb")
#
#
#
# ggplot(dicho_r, aes(x=cor_gs, y=cor_gy, color = comb, label = comb)) +
# geom_point() +
# scale_x_continuous(limits = c(0,1)) +
# scale_y_continuous(limits = c(0,1)) +
# geom_abline(intercept = 0, slope = 1) +
# geom_label_repel() +
# theme(legend.position = "none")